hZzy's picture
Model save
8781e61 verified
raw
history blame
96.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.992914501653283,
"eval_steps": 50,
"global_step": 1056,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.002834199338686821,
"grad_norm": 26513.418063359753,
"learning_rate": 4.716981132075472e-08,
"logits": -1.2867579460144043,
"logps": -84.34933471679688,
"loss": 458.1349,
"objective": 431.1807556152344,
"ranking_simple": 0.5833333134651184,
"regularize": 0.3618059456348419,
"step": 1
},
{
"dpo_loss": 0.6925470232963562,
"epoch": 0.014170996693434105,
"grad_norm": 25616.845196611706,
"learning_rate": 2.358490566037736e-07,
"logits": -1.4298049211502075,
"logps": -83.76395416259766,
"loss": 481.1315,
"objective": 456.7127990722656,
"ranking_simple": 0.4895833432674408,
"regularize": 0.40281566977500916,
"step": 5
},
{
"dpo_loss": 0.6908925175666809,
"epoch": 0.02834199338686821,
"grad_norm": 25634.63923836155,
"learning_rate": 4.716981132075472e-07,
"logits": -1.3988193273544312,
"logps": -84.28076171875,
"loss": 465.4449,
"objective": 457.4544982910156,
"ranking_simple": 0.574999988079071,
"regularize": 0.4003960192203522,
"step": 10
},
{
"dpo_loss": 0.6889466643333435,
"epoch": 0.042512990080302314,
"grad_norm": 25839.066108054885,
"learning_rate": 7.075471698113208e-07,
"logits": -1.5292092561721802,
"logps": -83.47270202636719,
"loss": 471.5689,
"objective": 480.5885314941406,
"ranking_simple": 0.5666666626930237,
"regularize": 0.4446539282798767,
"step": 15
},
{
"dpo_loss": 0.6812318563461304,
"epoch": 0.05668398677373642,
"grad_norm": 27814.64921945046,
"learning_rate": 9.433962264150944e-07,
"logits": -1.368198275566101,
"logps": -82.4678726196289,
"loss": 477.3994,
"objective": 495.29437255859375,
"ranking_simple": 0.48750001192092896,
"regularize": 0.43586504459381104,
"step": 20
},
{
"dpo_loss": 0.681931734085083,
"epoch": 0.07085498346717052,
"grad_norm": 23147.9179925769,
"learning_rate": 1.179245283018868e-06,
"logits": -1.3939018249511719,
"logps": -82.79817962646484,
"loss": 465.0164,
"objective": 451.0166320800781,
"ranking_simple": 0.5708333253860474,
"regularize": 0.39363664388656616,
"step": 25
},
{
"dpo_loss": 0.6791452765464783,
"epoch": 0.08502598016060463,
"grad_norm": 20393.372937507134,
"learning_rate": 1.4150943396226415e-06,
"logits": -1.4756665229797363,
"logps": -83.33882141113281,
"loss": 460.6366,
"objective": 463.89178466796875,
"ranking_simple": 0.4833333194255829,
"regularize": 0.4173465967178345,
"step": 30
},
{
"dpo_loss": 0.6747857332229614,
"epoch": 0.09919697685403873,
"grad_norm": 23418.03953630638,
"learning_rate": 1.650943396226415e-06,
"logits": -1.4623119831085205,
"logps": -81.88738250732422,
"loss": 469.5609,
"objective": 465.93438720703125,
"ranking_simple": 0.5375000238418579,
"regularize": 0.3794897198677063,
"step": 35
},
{
"dpo_loss": 0.6748687028884888,
"epoch": 0.11336797354747284,
"grad_norm": 23935.328029038144,
"learning_rate": 1.8867924528301889e-06,
"logits": -1.4446643590927124,
"logps": -82.75718688964844,
"loss": 464.1251,
"objective": 463.13604736328125,
"ranking_simple": 0.5041666626930237,
"regularize": 0.3904489278793335,
"step": 40
},
{
"dpo_loss": 0.6506677865982056,
"epoch": 0.12753897024090693,
"grad_norm": 21652.885906992553,
"learning_rate": 2.1226415094339624e-06,
"logits": -1.4655033349990845,
"logps": -82.44393157958984,
"loss": 449.3847,
"objective": 443.1551818847656,
"ranking_simple": 0.5249999761581421,
"regularize": 0.39696159958839417,
"step": 45
},
{
"dpo_loss": 0.6506399512290955,
"epoch": 0.14170996693434104,
"grad_norm": 21470.261560892,
"learning_rate": 2.358490566037736e-06,
"logits": -1.4927986860275269,
"logps": -86.20177459716797,
"loss": 470.2434,
"objective": 469.0955810546875,
"ranking_simple": 0.5041666626930237,
"regularize": 0.4380335211753845,
"step": 50
},
{
"epoch": 0.14170996693434104,
"eval_dpo_loss": 0.6872959136962891,
"eval_logits": -1.4968135356903076,
"eval_logps": -94.33769226074219,
"eval_loss": 491.6701965332031,
"eval_objective": 488.937255859375,
"eval_ranking_simple": 0.5269151329994202,
"eval_regularize": 0.4298084080219269,
"eval_runtime": 367.8725,
"eval_samples_per_second": 15.739,
"eval_steps_per_second": 1.313,
"step": 50
},
{
"dpo_loss": 0.6506890654563904,
"epoch": 0.15588096362777515,
"grad_norm": 19949.76098202467,
"learning_rate": 2.5943396226415095e-06,
"logits": -1.470070719718933,
"logps": -87.6925277709961,
"loss": 445.3158,
"objective": 479.81256103515625,
"ranking_simple": 0.5166666507720947,
"regularize": 0.3883524239063263,
"step": 55
},
{
"dpo_loss": 0.644140362739563,
"epoch": 0.17005196032120926,
"grad_norm": 19296.57503340127,
"learning_rate": 2.830188679245283e-06,
"logits": -1.3877280950546265,
"logps": -84.86709594726562,
"loss": 439.9873,
"objective": 439.1716613769531,
"ranking_simple": 0.5375000238418579,
"regularize": 0.4104911684989929,
"step": 60
},
{
"dpo_loss": 0.6160763502120972,
"epoch": 0.18422295701464336,
"grad_norm": 19281.315668093266,
"learning_rate": 3.0660377358490567e-06,
"logits": -1.431338906288147,
"logps": -79.60600280761719,
"loss": 440.686,
"objective": 418.4104309082031,
"ranking_simple": 0.5666666626930237,
"regularize": 0.36962300539016724,
"step": 65
},
{
"dpo_loss": 0.6181739568710327,
"epoch": 0.19839395370807747,
"grad_norm": 18809.239218176448,
"learning_rate": 3.30188679245283e-06,
"logits": -1.4349617958068848,
"logps": -82.71049499511719,
"loss": 439.5741,
"objective": 443.4054870605469,
"ranking_simple": 0.550000011920929,
"regularize": 0.4068828225135803,
"step": 70
},
{
"dpo_loss": 0.6287192702293396,
"epoch": 0.21256495040151158,
"grad_norm": 20616.517476343855,
"learning_rate": 3.5377358490566038e-06,
"logits": -1.333349585533142,
"logps": -82.84730529785156,
"loss": 457.74,
"objective": 467.5802307128906,
"ranking_simple": 0.5625,
"regularize": 0.42490166425704956,
"step": 75
},
{
"dpo_loss": 0.6181908249855042,
"epoch": 0.22673594709494568,
"grad_norm": 20195.88685314973,
"learning_rate": 3.7735849056603777e-06,
"logits": -1.3842099905014038,
"logps": -80.95723724365234,
"loss": 435.7331,
"objective": 441.63299560546875,
"ranking_simple": 0.5833333134651184,
"regularize": 0.42700326442718506,
"step": 80
},
{
"dpo_loss": 0.6087173819541931,
"epoch": 0.2409069437883798,
"grad_norm": 15942.080314795965,
"learning_rate": 4.009433962264152e-06,
"logits": -1.344446063041687,
"logps": -80.02598571777344,
"loss": 443.4859,
"objective": 464.4291076660156,
"ranking_simple": 0.5666666626930237,
"regularize": 0.45569831132888794,
"step": 85
},
{
"dpo_loss": 0.5819770693778992,
"epoch": 0.25507794048181387,
"grad_norm": 16092.557140676014,
"learning_rate": 4.245283018867925e-06,
"logits": -1.4104372262954712,
"logps": -81.34741973876953,
"loss": 449.1501,
"objective": 444.44000244140625,
"ranking_simple": 0.6041666865348816,
"regularize": 0.4178715646266937,
"step": 90
},
{
"dpo_loss": 0.5937064290046692,
"epoch": 0.269248937175248,
"grad_norm": 16918.089764873057,
"learning_rate": 4.481132075471699e-06,
"logits": -1.3456578254699707,
"logps": -83.11612701416016,
"loss": 458.477,
"objective": 447.3981018066406,
"ranking_simple": 0.5458333492279053,
"regularize": 0.41778501868247986,
"step": 95
},
{
"dpo_loss": 0.6062127351760864,
"epoch": 0.2834199338686821,
"grad_norm": 16311.430045391364,
"learning_rate": 4.716981132075472e-06,
"logits": -1.3139296770095825,
"logps": -79.3995132446289,
"loss": 444.0833,
"objective": 441.1212463378906,
"ranking_simple": 0.5625,
"regularize": 0.4033554494380951,
"step": 100
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 0.6839331984519958,
"eval_logits": -1.4344693422317505,
"eval_logps": -85.09794616699219,
"eval_loss": 519.0431518554688,
"eval_objective": 504.62091064453125,
"eval_ranking_simple": 0.5383023023605347,
"eval_regularize": 0.4692068099975586,
"eval_runtime": 368.2123,
"eval_samples_per_second": 15.725,
"eval_steps_per_second": 1.312,
"step": 100
},
{
"dpo_loss": 0.6278888583183289,
"epoch": 0.2975909305621162,
"grad_norm": 16121.92097886621,
"learning_rate": 4.952830188679246e-06,
"logits": -1.4997217655181885,
"logps": -80.34195709228516,
"loss": 464.4937,
"objective": 497.4685363769531,
"ranking_simple": 0.5874999761581421,
"regularize": 0.5123757123947144,
"step": 105
},
{
"dpo_loss": 0.6059397459030151,
"epoch": 0.3117619272555503,
"grad_norm": 14461.937244168814,
"learning_rate": 4.999781286194085e-06,
"logits": -1.379300832748413,
"logps": -78.7112045288086,
"loss": 472.4527,
"objective": 461.8623046875,
"ranking_simple": 0.5333333611488342,
"regularize": 0.47747528553009033,
"step": 110
},
{
"dpo_loss": 0.6304748058319092,
"epoch": 0.32593292394898443,
"grad_norm": 15822.122754431652,
"learning_rate": 4.998892826944418e-06,
"logits": -1.303352952003479,
"logps": -76.60855102539062,
"loss": 466.8177,
"objective": 472.0991516113281,
"ranking_simple": 0.5874999761581421,
"regularize": 0.48390674591064453,
"step": 115
},
{
"dpo_loss": 0.5859030485153198,
"epoch": 0.3401039206424185,
"grad_norm": 14414.401646390268,
"learning_rate": 4.997321195347154e-06,
"logits": -1.2155264616012573,
"logps": -78.24824523925781,
"loss": 478.7331,
"objective": 465.13909912109375,
"ranking_simple": 0.574999988079071,
"regularize": 0.44416898488998413,
"step": 120
},
{
"dpo_loss": 0.5911449790000916,
"epoch": 0.35427491733585265,
"grad_norm": 12964.656141234129,
"learning_rate": 4.9950668210706795e-06,
"logits": -1.202903151512146,
"logps": -79.2585220336914,
"loss": 448.101,
"objective": 439.24993896484375,
"ranking_simple": 0.5541666746139526,
"regularize": 0.4476715922355652,
"step": 125
},
{
"dpo_loss": 0.6120374202728271,
"epoch": 0.3684459140292867,
"grad_norm": 15154.178723672056,
"learning_rate": 4.992130320438411e-06,
"logits": -1.1208935976028442,
"logps": -78.75408172607422,
"loss": 491.5896,
"objective": 508.3560485839844,
"ranking_simple": 0.6000000238418579,
"regularize": 0.507615864276886,
"step": 130
},
{
"dpo_loss": 0.5841532349586487,
"epoch": 0.3826169107227208,
"grad_norm": 13801.6700914244,
"learning_rate": 4.988512496260302e-06,
"logits": -1.0719252824783325,
"logps": -79.74116516113281,
"loss": 457.2002,
"objective": 470.0576477050781,
"ranking_simple": 0.5458333492279053,
"regularize": 0.423565536737442,
"step": 135
},
{
"dpo_loss": 0.6198513507843018,
"epoch": 0.39678790741615494,
"grad_norm": 15864.353210882817,
"learning_rate": 4.984214337613357e-06,
"logits": -1.0588831901550293,
"logps": -78.39877319335938,
"loss": 473.3348,
"objective": 488.9702453613281,
"ranking_simple": 0.5666666626930237,
"regularize": 0.49831974506378174,
"step": 140
},
{
"dpo_loss": 0.6141570806503296,
"epoch": 0.410958904109589,
"grad_norm": 13855.012204270928,
"learning_rate": 4.979237019571235e-06,
"logits": -1.0990999937057495,
"logps": -81.36931610107422,
"loss": 472.2201,
"objective": 467.56060791015625,
"ranking_simple": 0.6208333373069763,
"regularize": 0.4914819896221161,
"step": 145
},
{
"dpo_loss": 0.6057680249214172,
"epoch": 0.42512990080302315,
"grad_norm": 12908.639969406,
"learning_rate": 4.97358190288299e-06,
"logits": -1.0687533617019653,
"logps": -82.37923431396484,
"loss": 462.7395,
"objective": 476.6890869140625,
"ranking_simple": 0.5666666626930237,
"regularize": 0.5104095935821533,
"step": 150
},
{
"epoch": 0.42512990080302315,
"eval_dpo_loss": 0.6978219151496887,
"eval_logits": -1.114176630973816,
"eval_logps": -85.36813354492188,
"eval_loss": 552.14501953125,
"eval_objective": 536.3590698242188,
"eval_ranking_simple": 0.5367494821548462,
"eval_regularize": 0.530274510383606,
"eval_runtime": 367.6708,
"eval_samples_per_second": 15.748,
"eval_steps_per_second": 1.314,
"step": 150
},
{
"dpo_loss": 0.583582878112793,
"epoch": 0.43930089749645723,
"grad_norm": 13162.177891801954,
"learning_rate": 4.967250533601059e-06,
"logits": -1.109771966934204,
"logps": -79.27082824707031,
"loss": 471.5066,
"objective": 491.6888122558594,
"ranking_simple": 0.5625,
"regularize": 0.5010552406311035,
"step": 155
},
{
"dpo_loss": 0.5951789021492004,
"epoch": 0.45347189418989137,
"grad_norm": 13529.150362850014,
"learning_rate": 4.9602446426585845e-06,
"logits": -0.9521434307098389,
"logps": -78.37739562988281,
"loss": 462.6479,
"objective": 443.32379150390625,
"ranking_simple": 0.5625,
"regularize": 0.43846720457077026,
"step": 160
},
{
"dpo_loss": 0.5964730381965637,
"epoch": 0.46764289088332545,
"grad_norm": 12752.905142046688,
"learning_rate": 4.952566145396197e-06,
"logits": -0.9862248301506042,
"logps": -77.13455963134766,
"loss": 478.3803,
"objective": 439.1322021484375,
"ranking_simple": 0.5916666388511658,
"regularize": 0.4469877779483795,
"step": 165
},
{
"dpo_loss": 0.6038484573364258,
"epoch": 0.4818138875767596,
"grad_norm": 10136.08564624282,
"learning_rate": 4.944217141038379e-06,
"logits": -0.9612207412719727,
"logps": -76.84504699707031,
"loss": 464.1854,
"objective": 468.0538635253906,
"ranking_simple": 0.5375000238418579,
"regularize": 0.48102495074272156,
"step": 170
},
{
"dpo_loss": 0.6333703994750977,
"epoch": 0.49598488427019366,
"grad_norm": 12679.513797703847,
"learning_rate": 4.935199912119558e-06,
"logits": -0.8710211515426636,
"logps": -78.75601959228516,
"loss": 464.1875,
"objective": 483.89111328125,
"ranking_simple": 0.5458333492279053,
"regularize": 0.514005720615387,
"step": 175
},
{
"dpo_loss": 0.5934690833091736,
"epoch": 0.5101558809636277,
"grad_norm": 11848.627128456088,
"learning_rate": 4.925516923860083e-06,
"logits": -0.8811076879501343,
"logps": -79.12806701660156,
"loss": 454.5442,
"objective": 488.2110900878906,
"ranking_simple": 0.5333333611488342,
"regularize": 0.4675270617008209,
"step": 180
},
{
"dpo_loss": 0.5724626183509827,
"epoch": 0.5243268776570619,
"grad_norm": 11713.664064429433,
"learning_rate": 4.9151708234922605e-06,
"logits": -0.877926230430603,
"logps": -78.4993667602539,
"loss": 458.0594,
"objective": 457.6112060546875,
"ranking_simple": 0.5666666626930237,
"regularize": 0.4596666693687439,
"step": 185
},
{
"dpo_loss": 0.5861265659332275,
"epoch": 0.538497874350496,
"grad_norm": 11338.428858471121,
"learning_rate": 4.904164439536626e-06,
"logits": -0.9104651808738708,
"logps": -78.65680694580078,
"loss": 487.9273,
"objective": 496.1139221191406,
"ranking_simple": 0.6083333492279053,
"regularize": 0.48734599351882935,
"step": 190
},
{
"dpo_loss": 0.555807888507843,
"epoch": 0.5526688710439301,
"grad_norm": 12189.271370638136,
"learning_rate": 4.8925007810286555e-06,
"logits": -0.8752073645591736,
"logps": -79.23562622070312,
"loss": 470.8679,
"objective": 470.63421630859375,
"ranking_simple": 0.612500011920929,
"regularize": 0.4960786998271942,
"step": 195
},
{
"dpo_loss": 0.5812997817993164,
"epoch": 0.5668398677373642,
"grad_norm": 11198.141597437847,
"learning_rate": 4.880183036696123e-06,
"logits": -0.8242141008377075,
"logps": -78.40170288085938,
"loss": 445.5849,
"objective": 460.94390869140625,
"ranking_simple": 0.5541666746139526,
"regularize": 0.44002261757850647,
"step": 200
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 0.706484317779541,
"eval_logits": -0.8469271063804626,
"eval_logps": -81.4329833984375,
"eval_loss": 561.5618896484375,
"eval_objective": 550.3474731445312,
"eval_ranking_simple": 0.533643901348114,
"eval_regularize": 0.5524560809135437,
"eval_runtime": 367.805,
"eval_samples_per_second": 15.742,
"eval_steps_per_second": 1.313,
"step": 200
},
{
"dpo_loss": 0.584477961063385,
"epoch": 0.5810108644307983,
"grad_norm": 11490.315926548616,
"learning_rate": 4.867214574087338e-06,
"logits": -0.6800127625465393,
"logps": -77.10325622558594,
"loss": 450.6568,
"objective": 437.6501159667969,
"ranking_simple": 0.5874999761581421,
"regularize": 0.45991986989974976,
"step": 205
},
{
"dpo_loss": 0.580778181552887,
"epoch": 0.5951818611242324,
"grad_norm": 12820.159548843532,
"learning_rate": 4.853598938650487e-06,
"logits": -0.7779163718223572,
"logps": -75.716552734375,
"loss": 486.1006,
"objective": 472.6957702636719,
"ranking_simple": 0.5958333611488342,
"regularize": 0.47449955344200134,
"step": 210
},
{
"dpo_loss": 0.5629280805587769,
"epoch": 0.6093528578176665,
"grad_norm": 12199.139102166855,
"learning_rate": 4.8393398527643495e-06,
"logits": -0.9319173097610474,
"logps": -74.01848602294922,
"loss": 450.7458,
"objective": 442.0367736816406,
"ranking_simple": 0.5541666746139526,
"regularize": 0.4502807855606079,
"step": 215
},
{
"dpo_loss": 0.551164984703064,
"epoch": 0.6235238545111006,
"grad_norm": 12032.737647695545,
"learning_rate": 4.824441214720629e-06,
"logits": -1.0898358821868896,
"logps": -73.6335678100586,
"loss": 444.6644,
"objective": 445.2054748535156,
"ranking_simple": 0.5333333611488342,
"regularize": 0.4597654938697815,
"step": 220
},
{
"dpo_loss": 0.583297848701477,
"epoch": 0.6376948512045347,
"grad_norm": 9292.574396967713,
"learning_rate": 4.808907097658205e-06,
"logits": -1.052019476890564,
"logps": -75.00895690917969,
"loss": 448.7449,
"objective": 459.1669921875,
"ranking_simple": 0.5416666865348816,
"regularize": 0.4535444974899292,
"step": 225
},
{
"dpo_loss": 0.5654311776161194,
"epoch": 0.6518658478979689,
"grad_norm": 10190.172018933226,
"learning_rate": 4.7927417484495756e-06,
"logits": -0.8342668414115906,
"logps": -75.51798248291016,
"loss": 436.2228,
"objective": 409.9478454589844,
"ranking_simple": 0.6416666507720947,
"regularize": 0.4194689691066742,
"step": 230
},
{
"dpo_loss": 0.6019126772880554,
"epoch": 0.6660368445914029,
"grad_norm": 12000.878566078438,
"learning_rate": 4.7759495865398035e-06,
"logits": -0.64374178647995,
"logps": -77.013916015625,
"loss": 457.7021,
"objective": 454.7564697265625,
"ranking_simple": 0.5958333611488342,
"regularize": 0.4394772946834564,
"step": 235
},
{
"dpo_loss": 0.5635860562324524,
"epoch": 0.680207841284837,
"grad_norm": 11997.018653451396,
"learning_rate": 4.758535202738287e-06,
"logits": -0.8103247284889221,
"logps": -77.16266632080078,
"loss": 448.4921,
"objective": 470.2117004394531,
"ranking_simple": 0.6000000238418579,
"regularize": 0.4310372471809387,
"step": 240
},
{
"dpo_loss": 0.612980306148529,
"epoch": 0.6943788379782712,
"grad_norm": 12037.283885838251,
"learning_rate": 4.740503357963676e-06,
"logits": -0.8841701745986938,
"logps": -77.16488647460938,
"loss": 431.5274,
"objective": 442.6828308105469,
"ranking_simple": 0.5916666388511658,
"regularize": 0.4170995056629181,
"step": 245
},
{
"dpo_loss": 0.5597648024559021,
"epoch": 0.7085498346717053,
"grad_norm": 10567.387466711676,
"learning_rate": 4.721858981942284e-06,
"logits": -0.8271477818489075,
"logps": -75.92268371582031,
"loss": 445.1676,
"objective": 432.1976013183594,
"ranking_simple": 0.6083333492279053,
"regularize": 0.39139440655708313,
"step": 250
},
{
"epoch": 0.7085498346717053,
"eval_dpo_loss": 0.7070175409317017,
"eval_logits": -1.0391225814819336,
"eval_logps": -80.71741485595703,
"eval_loss": 572.16943359375,
"eval_objective": 563.6924438476562,
"eval_ranking_simple": 0.5408902764320374,
"eval_regularize": 0.5829753279685974,
"eval_runtime": 368.3728,
"eval_samples_per_second": 15.718,
"eval_steps_per_second": 1.311,
"step": 250
},
{
"dpo_loss": 0.5912286639213562,
"epoch": 0.7227208313651393,
"grad_norm": 10004.706428299638,
"learning_rate": 4.702607171860354e-06,
"logits": -1.0291627645492554,
"logps": -75.54689025878906,
"loss": 430.9432,
"objective": 432.97802734375,
"ranking_simple": 0.5249999761581421,
"regularize": 0.4280008375644684,
"step": 255
},
{
"dpo_loss": 0.5518169403076172,
"epoch": 0.7368918280585735,
"grad_norm": 10338.762017019879,
"learning_rate": 4.682753190970533e-06,
"logits": -0.9816469550132751,
"logps": -78.38555145263672,
"loss": 441.5456,
"objective": 472.0696716308594,
"ranking_simple": 0.5791666507720947,
"regularize": 0.4596433639526367,
"step": 260
},
{
"dpo_loss": 0.5279027819633484,
"epoch": 0.7510628247520076,
"grad_norm": 9975.569885392504,
"learning_rate": 4.6623024671529555e-06,
"logits": -0.8709200024604797,
"logps": -78.86503601074219,
"loss": 415.3095,
"objective": 424.6243896484375,
"ranking_simple": 0.637499988079071,
"regularize": 0.4246416687965393,
"step": 265
},
{
"dpo_loss": 0.5645706057548523,
"epoch": 0.7652338214454416,
"grad_norm": 10607.895202202406,
"learning_rate": 4.641260591431315e-06,
"logits": -0.8247819542884827,
"logps": -78.76771545410156,
"loss": 434.6699,
"objective": 440.14715576171875,
"ranking_simple": 0.5333333611488342,
"regularize": 0.42400452494621277,
"step": 270
},
{
"dpo_loss": 0.587860107421875,
"epoch": 0.7794048181388757,
"grad_norm": 9977.49094753435,
"learning_rate": 4.61963331644433e-06,
"logits": -0.6680871844291687,
"logps": -77.70354461669922,
"loss": 438.6425,
"objective": 448.8917236328125,
"ranking_simple": 0.5833333134651184,
"regularize": 0.46749448776245117,
"step": 275
},
{
"dpo_loss": 0.5520439147949219,
"epoch": 0.7935758148323099,
"grad_norm": 11654.173000851612,
"learning_rate": 4.597426554873037e-06,
"logits": -0.7018941640853882,
"logps": -78.6235122680664,
"loss": 443.7228,
"objective": 438.81072998046875,
"ranking_simple": 0.5791666507720947,
"regularize": 0.4298190176486969,
"step": 280
},
{
"dpo_loss": 0.5620520710945129,
"epoch": 0.807746811525744,
"grad_norm": 11208.671816444528,
"learning_rate": 4.574646377824316e-06,
"logits": -0.7278221845626831,
"logps": -79.00438690185547,
"loss": 428.4479,
"objective": 430.4336242675781,
"ranking_simple": 0.5375000238418579,
"regularize": 0.413059800863266,
"step": 285
},
{
"dpo_loss": 0.5554956793785095,
"epoch": 0.821917808219178,
"grad_norm": 10809.870000518249,
"learning_rate": 4.551299013171111e-06,
"logits": -0.7191876769065857,
"logps": -77.41136169433594,
"loss": 399.2666,
"objective": 398.6150817871094,
"ranking_simple": 0.5791666507720947,
"regularize": 0.3730964958667755,
"step": 290
},
{
"dpo_loss": 0.5479804873466492,
"epoch": 0.8360888049126122,
"grad_norm": 10163.479021180265,
"learning_rate": 4.5273908438498e-06,
"logits": -0.7566318511962891,
"logps": -78.29537963867188,
"loss": 420.4245,
"objective": 438.2878112792969,
"ranking_simple": 0.5541666746139526,
"regularize": 0.43994590640068054,
"step": 295
},
{
"dpo_loss": 0.564975917339325,
"epoch": 0.8502598016060463,
"grad_norm": 10246.9159310537,
"learning_rate": 4.502928406115152e-06,
"logits": -0.7081549167633057,
"logps": -79.06087493896484,
"loss": 413.9375,
"objective": 419.59674072265625,
"ranking_simple": 0.6291666626930237,
"regularize": 0.3981608748435974,
"step": 300
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 0.7031394839286804,
"eval_logits": -0.7451677918434143,
"eval_logps": -84.88603210449219,
"eval_loss": 567.0264282226562,
"eval_objective": 558.1202392578125,
"eval_ranking_simple": 0.5398550629615784,
"eval_regularize": 0.573249101638794,
"eval_runtime": 374.6781,
"eval_samples_per_second": 15.453,
"eval_steps_per_second": 1.289,
"step": 300
},
{
"dpo_loss": 0.557949423789978,
"epoch": 0.8644307982994804,
"grad_norm": 10481.083205902773,
"learning_rate": 4.477918387753388e-06,
"logits": -0.6799180507659912,
"logps": -79.46160125732422,
"loss": 416.0437,
"objective": 436.0469970703125,
"ranking_simple": 0.625,
"regularize": 0.3994833528995514,
"step": 305
},
{
"dpo_loss": 0.537316083908081,
"epoch": 0.8786017949929145,
"grad_norm": 10330.127581982748,
"learning_rate": 4.452367626253805e-06,
"logits": -0.6589821577072144,
"logps": -77.94523620605469,
"loss": 412.2728,
"objective": 435.38397216796875,
"ranking_simple": 0.5958333611488342,
"regularize": 0.42988669872283936,
"step": 310
},
{
"dpo_loss": 0.5410233736038208,
"epoch": 0.8927727916863486,
"grad_norm": 9680.592388360066,
"learning_rate": 4.426283106939474e-06,
"logits": -0.5960977077484131,
"logps": -77.44992065429688,
"loss": 407.9562,
"objective": 391.1397705078125,
"ranking_simple": 0.5416666865348816,
"regularize": 0.3800966143608093,
"step": 315
},
{
"dpo_loss": 0.5544202923774719,
"epoch": 0.9069437883797827,
"grad_norm": 10390.175171602317,
"learning_rate": 4.399671961057523e-06,
"logits": -0.4942823052406311,
"logps": -79.33159637451172,
"loss": 418.3425,
"objective": 413.7690124511719,
"ranking_simple": 0.5874999761581421,
"regularize": 0.3666258752346039,
"step": 320
},
{
"dpo_loss": 0.5637380480766296,
"epoch": 0.9211147850732169,
"grad_norm": 9093.12442680975,
"learning_rate": 4.372541463829524e-06,
"logits": -0.5750992298126221,
"logps": -79.70758819580078,
"loss": 400.4521,
"objective": 385.9770202636719,
"ranking_simple": 0.5874999761581421,
"regularize": 0.3547678589820862,
"step": 325
},
{
"dpo_loss": 0.527768075466156,
"epoch": 0.9352857817666509,
"grad_norm": 9514.787690422123,
"learning_rate": 4.3448990324625244e-06,
"logits": -0.5246156454086304,
"logps": -78.73555755615234,
"loss": 404.0078,
"objective": 408.9071960449219,
"ranking_simple": 0.6166666746139526,
"regularize": 0.38706058263778687,
"step": 330
},
{
"dpo_loss": 0.5262110829353333,
"epoch": 0.949456778460085,
"grad_norm": 8108.6927584156865,
"learning_rate": 4.316752224121252e-06,
"logits": -0.5318282246589661,
"logps": -78.84705352783203,
"loss": 395.0854,
"objective": 391.35888671875,
"ranking_simple": 0.625,
"regularize": 0.3775762617588043,
"step": 335
},
{
"dpo_loss": 0.5850500464439392,
"epoch": 0.9636277751535192,
"grad_norm": 9909.017294058862,
"learning_rate": 4.288108733862064e-06,
"logits": -0.5778205394744873,
"logps": -78.09910583496094,
"loss": 425.4681,
"objective": 410.2987365722656,
"ranking_simple": 0.5958333611488342,
"regularize": 0.3491324782371521,
"step": 340
},
{
"dpo_loss": 0.5264328718185425,
"epoch": 0.9777987718469532,
"grad_norm": 10518.367912162912,
"learning_rate": 4.2589763925291924e-06,
"logits": -0.6430075168609619,
"logps": -76.8382797241211,
"loss": 386.2568,
"objective": 371.39697265625,
"ranking_simple": 0.6541666388511658,
"regularize": 0.3261619508266449,
"step": 345
},
{
"dpo_loss": 0.5298411846160889,
"epoch": 0.9919697685403873,
"grad_norm": 8822.17526005844,
"learning_rate": 4.229363164613874e-06,
"logits": -0.5401391983032227,
"logps": -78.81169128417969,
"loss": 385.7652,
"objective": 396.1875305175781,
"ranking_simple": 0.6583333611488342,
"regularize": 0.4019112288951874,
"step": 350
},
{
"epoch": 0.9919697685403873,
"eval_dpo_loss": 0.7081769108772278,
"eval_logits": -0.6075623035430908,
"eval_logps": -82.63887786865234,
"eval_loss": 581.0134887695312,
"eval_objective": 565.1652221679688,
"eval_ranking_simple": 0.5383023023605347,
"eval_regularize": 0.5905848741531372,
"eval_runtime": 368.0626,
"eval_samples_per_second": 15.731,
"eval_steps_per_second": 1.312,
"step": 350
},
{
"dpo_loss": 0.5235918760299683,
"epoch": 1.0061407652338215,
"grad_norm": 7985.929619871145,
"learning_rate": 4.199277146076933e-06,
"logits": -0.5844969153404236,
"logps": -77.1658935546875,
"loss": 395.3726,
"objective": 383.5928955078125,
"ranking_simple": 0.6208333373069763,
"regularize": 0.3756142854690552,
"step": 355
},
{
"dpo_loss": 0.5242210030555725,
"epoch": 1.0203117619272555,
"grad_norm": 9633.18732259904,
"learning_rate": 4.168726562135432e-06,
"logits": -0.5656494498252869,
"logps": -78.61763000488281,
"loss": 382.5271,
"objective": 390.3468322753906,
"ranking_simple": 0.5958333611488342,
"regularize": 0.3490845561027527,
"step": 360
},
{
"dpo_loss": 0.5211578011512756,
"epoch": 1.0344827586206897,
"grad_norm": 9644.11378605678,
"learning_rate": 4.137719765013974e-06,
"logits": -0.5837284922599792,
"logps": -78.80403900146484,
"loss": 393.9499,
"objective": 399.89215087890625,
"ranking_simple": 0.5833333134651184,
"regularize": 0.3770846724510193,
"step": 365
},
{
"dpo_loss": 0.5336027145385742,
"epoch": 1.0486537553141237,
"grad_norm": 8251.35503586343,
"learning_rate": 4.106265231661292e-06,
"logits": -0.5281592607498169,
"logps": -76.34611511230469,
"loss": 364.8078,
"objective": 364.66595458984375,
"ranking_simple": 0.5916666388511658,
"regularize": 0.337600976228714,
"step": 370
},
{
"dpo_loss": 0.5174949765205383,
"epoch": 1.0628247520075578,
"grad_norm": 9028.93002381266,
"learning_rate": 4.074371561432731e-06,
"logits": -0.5678179860115051,
"logps": -76.8271255493164,
"loss": 372.633,
"objective": 343.89019775390625,
"ranking_simple": 0.612500011920929,
"regularize": 0.30460023880004883,
"step": 375
},
{
"dpo_loss": 0.5492153167724609,
"epoch": 1.076995748700992,
"grad_norm": 8789.25650294624,
"learning_rate": 4.042047473739278e-06,
"logits": -0.5127583146095276,
"logps": -78.71736907958984,
"loss": 373.876,
"objective": 406.1351013183594,
"ranking_simple": 0.6291666626930237,
"regularize": 0.36652448773384094,
"step": 380
},
{
"dpo_loss": 0.5343239307403564,
"epoch": 1.091166745394426,
"grad_norm": 8695.398130642287,
"learning_rate": 4.009301805663752e-06,
"logits": -0.439236581325531,
"logps": -77.64140319824219,
"loss": 381.8216,
"objective": 398.8446960449219,
"ranking_simple": 0.6000000238418579,
"regularize": 0.3279392123222351,
"step": 385
},
{
"dpo_loss": 0.5177706480026245,
"epoch": 1.10533774208786,
"grad_norm": 9041.130741001136,
"learning_rate": 3.976143509544843e-06,
"logits": -0.4288846254348755,
"logps": -79.35343933105469,
"loss": 357.1327,
"objective": 361.1878967285156,
"ranking_simple": 0.6416666507720947,
"regularize": 0.3373713791370392,
"step": 390
},
{
"dpo_loss": 0.5048284530639648,
"epoch": 1.1195087387812943,
"grad_norm": 8902.672627868116,
"learning_rate": 3.9425816505296254e-06,
"logits": -0.48021775484085083,
"logps": -78.194091796875,
"loss": 394.8973,
"objective": 412.3692932128906,
"ranking_simple": 0.6000000238418579,
"regularize": 0.3603326082229614,
"step": 395
},
{
"dpo_loss": 0.5264464020729065,
"epoch": 1.1336797354747283,
"grad_norm": 8995.938641641998,
"learning_rate": 3.908625404095242e-06,
"logits": -0.4987303912639618,
"logps": -77.11076354980469,
"loss": 376.3251,
"objective": 368.0682373046875,
"ranking_simple": 0.625,
"regularize": 0.31286415457725525,
"step": 400
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 0.7118101716041565,
"eval_logits": -0.5273135900497437,
"eval_logps": -81.622314453125,
"eval_loss": 586.0215454101562,
"eval_objective": 571.4174194335938,
"eval_ranking_simple": 0.5367494821548462,
"eval_regularize": 0.5996229648590088,
"eval_runtime": 367.4788,
"eval_samples_per_second": 15.756,
"eval_steps_per_second": 1.314,
"step": 400
},
{
"dpo_loss": 0.5221129059791565,
"epoch": 1.1478507321681626,
"grad_norm": 8887.877038015888,
"learning_rate": 3.8742840535404155e-06,
"logits": -0.4772927165031433,
"logps": -78.21456909179688,
"loss": 374.1795,
"objective": 403.9602355957031,
"ranking_simple": 0.5791666507720947,
"regularize": 0.35953524708747864,
"step": 405
},
{
"dpo_loss": 0.5320737361907959,
"epoch": 1.1620217288615966,
"grad_norm": 8653.742076980236,
"learning_rate": 3.839566987447492e-06,
"logits": -0.5261733531951904,
"logps": -77.4281005859375,
"loss": 357.8752,
"objective": 369.8106689453125,
"ranking_simple": 0.6041666865348816,
"regularize": 0.31248220801353455,
"step": 410
},
{
"dpo_loss": 0.5130491852760315,
"epoch": 1.1761927255550306,
"grad_norm": 8220.944462330583,
"learning_rate": 3.8044836971156935e-06,
"logits": -0.4498496651649475,
"logps": -76.78229522705078,
"loss": 367.3453,
"objective": 342.2223815917969,
"ranking_simple": 0.6166666746139526,
"regularize": 0.32919442653656006,
"step": 415
},
{
"dpo_loss": 0.5148084163665771,
"epoch": 1.1903637222484649,
"grad_norm": 8364.696648518799,
"learning_rate": 3.7690437739662928e-06,
"logits": -0.4175103008747101,
"logps": -78.17424011230469,
"loss": 381.6126,
"objective": 368.6730041503906,
"ranking_simple": 0.5874999761581421,
"regularize": 0.34451645612716675,
"step": 420
},
{
"dpo_loss": 0.5163763761520386,
"epoch": 1.204534718941899,
"grad_norm": 8541.974606101103,
"learning_rate": 3.7332569069204127e-06,
"logits": -0.45798221230506897,
"logps": -76.5196304321289,
"loss": 370.1389,
"objective": 370.2024230957031,
"ranking_simple": 0.6166666746139526,
"regularize": 0.3256681561470032,
"step": 425
},
{
"dpo_loss": 0.5450712442398071,
"epoch": 1.2187057156353331,
"grad_norm": 9487.349462942979,
"learning_rate": 3.697132879750174e-06,
"logits": -0.48889076709747314,
"logps": -78.89514923095703,
"loss": 384.2139,
"objective": 399.7441101074219,
"ranking_simple": 0.612500011920929,
"regularize": 0.3587479293346405,
"step": 430
},
{
"dpo_loss": 0.4979787766933441,
"epoch": 1.2328767123287672,
"grad_norm": 9248.918944555557,
"learning_rate": 3.6606815684039098e-06,
"logits": -0.45076984167099,
"logps": -78.12223815917969,
"loss": 376.6027,
"objective": 365.3465881347656,
"ranking_simple": 0.6708333492279053,
"regularize": 0.33380749821662903,
"step": 435
},
{
"dpo_loss": 0.5080611109733582,
"epoch": 1.2470477090222012,
"grad_norm": 8952.956886414895,
"learning_rate": 3.6239129383061764e-06,
"logits": -0.5257605314254761,
"logps": -77.80259704589844,
"loss": 367.3115,
"objective": 355.6669921875,
"ranking_simple": 0.6458333134651184,
"regularize": 0.3250352144241333,
"step": 440
},
{
"dpo_loss": 0.5034739375114441,
"epoch": 1.2612187057156352,
"grad_norm": 8595.813908218257,
"learning_rate": 3.586837041633312e-06,
"logits": -0.5648617148399353,
"logps": -78.69916534423828,
"loss": 353.7886,
"objective": 345.4654235839844,
"ranking_simple": 0.6625000238418579,
"regularize": 0.3045599162578583,
"step": 445
},
{
"dpo_loss": 0.5136024951934814,
"epoch": 1.2753897024090695,
"grad_norm": 8799.318521509487,
"learning_rate": 3.5494640145652647e-06,
"logits": -0.6497453451156616,
"logps": -77.91305541992188,
"loss": 348.4717,
"objective": 341.05877685546875,
"ranking_simple": 0.5833333134651184,
"regularize": 0.2908380925655365,
"step": 450
},
{
"epoch": 1.2753897024090695,
"eval_dpo_loss": 0.7055455446243286,
"eval_logits": -0.6517468094825745,
"eval_logps": -81.88983154296875,
"eval_loss": 576.5939331054688,
"eval_objective": 563.9976806640625,
"eval_ranking_simple": 0.5372670888900757,
"eval_regularize": 0.586566150188446,
"eval_runtime": 372.0863,
"eval_samples_per_second": 15.561,
"eval_steps_per_second": 1.298,
"step": 450
},
{
"dpo_loss": 0.5133717656135559,
"epoch": 1.2895606991025035,
"grad_norm": 8447.712740682113,
"learning_rate": 3.511804074514468e-06,
"logits": -0.50225830078125,
"logps": -78.78018188476562,
"loss": 350.7452,
"objective": 353.5893249511719,
"ranking_simple": 0.5583333373069763,
"regularize": 0.30045947432518005,
"step": 455
},
{
"dpo_loss": 0.5129916071891785,
"epoch": 1.3037316957959377,
"grad_norm": 8683.9771262322,
"learning_rate": 3.4738675173325008e-06,
"logits": -0.5019214153289795,
"logps": -78.0320816040039,
"loss": 358.7841,
"objective": 359.1448669433594,
"ranking_simple": 0.5583333373069763,
"regularize": 0.28918400406837463,
"step": 460
},
{
"dpo_loss": 0.5200445055961609,
"epoch": 1.3179026924893718,
"grad_norm": 8708.589588634271,
"learning_rate": 3.435664714495301e-06,
"logits": -0.4990668296813965,
"logps": -76.45315551757812,
"loss": 363.9146,
"objective": 359.3269348144531,
"ranking_simple": 0.5625,
"regularize": 0.317513108253479,
"step": 465
},
{
"dpo_loss": 0.5191565752029419,
"epoch": 1.3320736891828058,
"grad_norm": 7892.018349995868,
"learning_rate": 3.397206110267713e-06,
"logits": -0.5707500576972961,
"logps": -75.51515197753906,
"loss": 369.7767,
"objective": 370.79046630859375,
"ranking_simple": 0.6083333492279053,
"regularize": 0.30417945981025696,
"step": 470
},
{
"dpo_loss": 0.5160278081893921,
"epoch": 1.34624468587624,
"grad_norm": 8622.437524812693,
"learning_rate": 3.3585022188481247e-06,
"logits": -0.5167524814605713,
"logps": -77.0745849609375,
"loss": 347.9223,
"objective": 363.10107421875,
"ranking_simple": 0.6583333611488342,
"regularize": 0.3366948366165161,
"step": 475
},
{
"dpo_loss": 0.5038079023361206,
"epoch": 1.360415682569674,
"grad_norm": 8520.652876751588,
"learning_rate": 3.3195636214939943e-06,
"logits": -0.5720607042312622,
"logps": -76.88468170166016,
"loss": 352.4882,
"objective": 360.6014709472656,
"ranking_simple": 0.5833333134651184,
"regularize": 0.31744828820228577,
"step": 480
},
{
"dpo_loss": 0.5260137915611267,
"epoch": 1.3745866792631083,
"grad_norm": 8357.649136802573,
"learning_rate": 3.2804009636290403e-06,
"logits": -0.5760036706924438,
"logps": -75.65044403076172,
"loss": 352.4908,
"objective": 335.92327880859375,
"ranking_simple": 0.5874999761581421,
"regularize": 0.27229636907577515,
"step": 485
},
{
"dpo_loss": 0.5316032767295837,
"epoch": 1.3887576759565423,
"grad_norm": 8377.120289104301,
"learning_rate": 3.2410249519328848e-06,
"logits": -0.5220092535018921,
"logps": -78.16001892089844,
"loss": 350.6266,
"objective": 356.1325988769531,
"ranking_simple": 0.5916666388511658,
"regularize": 0.2998295724391937,
"step": 490
},
{
"dpo_loss": 0.5069059729576111,
"epoch": 1.4029286726499763,
"grad_norm": 9790.99603957699,
"learning_rate": 3.201446351413958e-06,
"logits": -0.5315040349960327,
"logps": -78.9278793334961,
"loss": 355.1208,
"objective": 340.3614196777344,
"ranking_simple": 0.5916666388511658,
"regularize": 0.2911123037338257,
"step": 495
},
{
"dpo_loss": 0.5013086199760437,
"epoch": 1.4170996693434104,
"grad_norm": 9096.333510047558,
"learning_rate": 3.1616759824664543e-06,
"logits": -0.47489532828330994,
"logps": -78.39351654052734,
"loss": 351.4185,
"objective": 349.8325500488281,
"ranking_simple": 0.6083333492279053,
"regularize": 0.2930639684200287,
"step": 500
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 0.7128105759620667,
"eval_logits": -0.5594385862350464,
"eval_logps": -82.85633087158203,
"eval_loss": 584.3820190429688,
"eval_objective": 570.8920288085938,
"eval_ranking_simple": 0.5393374562263489,
"eval_regularize": 0.597196638584137,
"eval_runtime": 367.2146,
"eval_samples_per_second": 15.767,
"eval_steps_per_second": 1.315,
"step": 500
},
{
"dpo_loss": 0.502229630947113,
"epoch": 1.4312706660368446,
"grad_norm": 7569.16735798095,
"learning_rate": 3.121724717912138e-06,
"logits": -0.4940933585166931,
"logps": -79.02289581298828,
"loss": 350.6974,
"objective": 343.7030944824219,
"ranking_simple": 0.6291666626930237,
"regularize": 0.2822100818157196,
"step": 505
},
{
"dpo_loss": 0.5396325588226318,
"epoch": 1.4454416627302786,
"grad_norm": 8970.009407427333,
"learning_rate": 3.081603480027826e-06,
"logits": -0.5547798275947571,
"logps": -79.63969421386719,
"loss": 354.8519,
"objective": 349.9094543457031,
"ranking_simple": 0.6000000238418579,
"regularize": 0.27683576941490173,
"step": 510
},
{
"dpo_loss": 0.5251208543777466,
"epoch": 1.4596126594237129,
"grad_norm": 8332.379052556926,
"learning_rate": 3.04132323755935e-06,
"logits": -0.627532422542572,
"logps": -79.98551940917969,
"loss": 345.0198,
"objective": 359.2087707519531,
"ranking_simple": 0.5416666865348816,
"regularize": 0.2865109145641327,
"step": 515
},
{
"dpo_loss": 0.5008211135864258,
"epoch": 1.473783656117147,
"grad_norm": 8229.897145458208,
"learning_rate": 3.0008950027228035e-06,
"logits": -0.5891799330711365,
"logps": -76.67542266845703,
"loss": 342.313,
"objective": 343.9622802734375,
"ranking_simple": 0.5916666388511658,
"regularize": 0.27687618136405945,
"step": 520
},
{
"dpo_loss": 0.4947444498538971,
"epoch": 1.487954652810581,
"grad_norm": 7702.6950001891755,
"learning_rate": 2.960329828193918e-06,
"logits": -0.5145970582962036,
"logps": -75.67664337158203,
"loss": 332.768,
"objective": 329.7011413574219,
"ranking_simple": 0.6166666746139526,
"regularize": 0.2696382403373718,
"step": 525
},
{
"dpo_loss": 0.5160828828811646,
"epoch": 1.5021256495040152,
"grad_norm": 8386.762596966919,
"learning_rate": 2.9196388040863695e-06,
"logits": -0.616746187210083,
"logps": -78.44214630126953,
"loss": 362.1123,
"objective": 358.41510009765625,
"ranking_simple": 0.5791666507720947,
"regularize": 0.29886895418167114,
"step": 530
},
{
"dpo_loss": 0.495453417301178,
"epoch": 1.5162966461974492,
"grad_norm": 7823.009103949036,
"learning_rate": 2.8788330549198512e-06,
"logits": -0.6062889099121094,
"logps": -77.4250717163086,
"loss": 331.5395,
"objective": 311.8442687988281,
"ranking_simple": 0.6000000238418579,
"regularize": 0.24911071360111237,
"step": 535
},
{
"dpo_loss": 0.5264947414398193,
"epoch": 1.5304676428908834,
"grad_norm": 8960.269885367992,
"learning_rate": 2.8379237365787426e-06,
"logits": -0.5448920130729675,
"logps": -77.27252960205078,
"loss": 334.277,
"objective": 340.4452209472656,
"ranking_simple": 0.6208333373069763,
"regularize": 0.2655259668827057,
"step": 540
},
{
"dpo_loss": 0.5327333211898804,
"epoch": 1.5446386395843175,
"grad_norm": 8003.393273489204,
"learning_rate": 2.7969220332622004e-06,
"logits": -0.6398530602455139,
"logps": -76.85224151611328,
"loss": 331.0988,
"objective": 332.23486328125,
"ranking_simple": 0.6541666388511658,
"regularize": 0.2610936164855957,
"step": 545
},
{
"dpo_loss": 0.5121258497238159,
"epoch": 1.5588096362777515,
"grad_norm": 7510.163531162746,
"learning_rate": 2.7558391544265127e-06,
"logits": -0.6813774704933167,
"logps": -76.5182876586914,
"loss": 326.458,
"objective": 333.58294677734375,
"ranking_simple": 0.5666666626930237,
"regularize": 0.27006784081459045,
"step": 550
},
{
"epoch": 1.5588096362777515,
"eval_dpo_loss": 0.7086328268051147,
"eval_logits": -0.6993709802627563,
"eval_logps": -80.56141662597656,
"eval_loss": 578.3502807617188,
"eval_objective": 565.96826171875,
"eval_ranking_simple": 0.5367494821548462,
"eval_regularize": 0.5877013206481934,
"eval_runtime": 367.856,
"eval_samples_per_second": 15.74,
"eval_steps_per_second": 1.313,
"step": 550
},
{
"dpo_loss": 0.5160035490989685,
"epoch": 1.5729806329711855,
"grad_norm": 8802.921874841602,
"learning_rate": 2.714686331720543e-06,
"logits": -0.5305084586143494,
"logps": -76.84326934814453,
"loss": 342.895,
"objective": 352.5166320800781,
"ranking_simple": 0.612500011920929,
"regularize": 0.26788362860679626,
"step": 555
},
{
"dpo_loss": 0.5147711634635925,
"epoch": 1.5871516296646198,
"grad_norm": 7348.156155946241,
"learning_rate": 2.6734748159151104e-06,
"logits": -0.5309932231903076,
"logps": -76.01145935058594,
"loss": 327.5288,
"objective": 325.1703796386719,
"ranking_simple": 0.5958333611488342,
"regularize": 0.25214916467666626,
"step": 560
},
{
"dpo_loss": 0.5153753161430359,
"epoch": 1.601322626358054,
"grad_norm": 8906.64379732216,
"learning_rate": 2.632215873827142e-06,
"logits": -0.4652445912361145,
"logps": -76.68761444091797,
"loss": 354.5548,
"objective": 364.25689697265625,
"ranking_simple": 0.5333333611488342,
"regularize": 0.28750428557395935,
"step": 565
},
{
"dpo_loss": 0.519721508026123,
"epoch": 1.615493623051488,
"grad_norm": 8653.666075373956,
"learning_rate": 2.5909207852394363e-06,
"logits": -0.5437235236167908,
"logps": -77.414794921875,
"loss": 333.0782,
"objective": 335.1112365722656,
"ranking_simple": 0.6166666746139526,
"regularize": 0.25310030579566956,
"step": 570
},
{
"dpo_loss": 0.5159035325050354,
"epoch": 1.629664619744922,
"grad_norm": 9098.50496014823,
"learning_rate": 2.5496008398168844e-06,
"logits": -0.5154822468757629,
"logps": -74.86051177978516,
"loss": 338.3886,
"objective": 360.5445861816406,
"ranking_simple": 0.6333333253860474,
"regularize": 0.293546199798584,
"step": 575
},
{
"dpo_loss": 0.5085076093673706,
"epoch": 1.643835616438356,
"grad_norm": 7622.784591036375,
"learning_rate": 2.508267334019988e-06,
"logits": -0.5285104513168335,
"logps": -75.09459686279297,
"loss": 326.7957,
"objective": 322.0632019042969,
"ranking_simple": 0.6166666746139526,
"regularize": 0.23723416030406952,
"step": 580
},
{
"dpo_loss": 0.5216612219810486,
"epoch": 1.6580066131317903,
"grad_norm": 8435.901879905807,
"learning_rate": 2.46693156801652e-06,
"logits": -0.43548285961151123,
"logps": -74.49349212646484,
"loss": 324.9342,
"objective": 314.76617431640625,
"ranking_simple": 0.5249999761581421,
"regularize": 0.23195335268974304,
"step": 585
},
{
"dpo_loss": 0.5121405720710754,
"epoch": 1.6721776098252243,
"grad_norm": 8829.097116710429,
"learning_rate": 2.4256048425921693e-06,
"logits": -0.4449107050895691,
"logps": -75.21448516845703,
"loss": 332.8984,
"objective": 336.7486572265625,
"ranking_simple": 0.6208333373069763,
"regularize": 0.25163254141807556,
"step": 590
},
{
"dpo_loss": 0.5095264911651611,
"epoch": 1.6863486065186586,
"grad_norm": 8981.762264761135,
"learning_rate": 2.384298456061023e-06,
"logits": -0.4447081387042999,
"logps": -75.95592498779297,
"loss": 329.941,
"objective": 336.85479736328125,
"ranking_simple": 0.6083333492279053,
"regularize": 0.263884961605072,
"step": 595
},
{
"dpo_loss": 0.5105345845222473,
"epoch": 1.7005196032120926,
"grad_norm": 8799.189125731713,
"learning_rate": 2.3430237011767166e-06,
"logits": -0.5217716693878174,
"logps": -76.75984191894531,
"loss": 329.0151,
"objective": 342.8132019042969,
"ranking_simple": 0.6291666626930237,
"regularize": 0.2702232301235199,
"step": 600
},
{
"epoch": 1.7005196032120926,
"eval_dpo_loss": 0.7084978222846985,
"eval_logits": -0.593561053276062,
"eval_logps": -80.32785034179688,
"eval_loss": 578.38671875,
"eval_objective": 566.0594482421875,
"eval_ranking_simple": 0.5388198494911194,
"eval_regularize": 0.5912774801254272,
"eval_runtime": 369.5452,
"eval_samples_per_second": 15.668,
"eval_steps_per_second": 1.307,
"step": 600
},
{
"dpo_loss": 0.5166282653808594,
"epoch": 1.7146905999055266,
"grad_norm": 8773.738115986773,
"learning_rate": 2.30179186204511e-06,
"logits": -0.4922519624233246,
"logps": -77.39384460449219,
"loss": 324.8781,
"objective": 324.775390625,
"ranking_simple": 0.5666666626930237,
"regularize": 0.23106712102890015,
"step": 605
},
{
"dpo_loss": 0.5313800573348999,
"epoch": 1.7288615965989607,
"grad_norm": 9468.956357202407,
"learning_rate": 2.2606142110393248e-06,
"logits": -0.5464334487915039,
"logps": -75.34829711914062,
"loss": 329.8079,
"objective": 328.7645568847656,
"ranking_simple": 0.6291666626930237,
"regularize": 0.2314998209476471,
"step": 610
},
{
"dpo_loss": 0.5274057984352112,
"epoch": 1.743032593292395,
"grad_norm": 8788.596472210756,
"learning_rate": 2.2195020057179897e-06,
"logits": -0.5443993210792542,
"logps": -75.97472381591797,
"loss": 328.8101,
"objective": 338.30584716796875,
"ranking_simple": 0.5958333611488342,
"regularize": 0.255048006772995,
"step": 615
},
{
"dpo_loss": 0.5214657187461853,
"epoch": 1.7572035899858292,
"grad_norm": 8592.256473559864,
"learning_rate": 2.1784664857475356e-06,
"logits": -0.5270652174949646,
"logps": -76.55162811279297,
"loss": 325.7626,
"objective": 319.66046142578125,
"ranking_simple": 0.625,
"regularize": 0.23240961134433746,
"step": 620
},
{
"dpo_loss": 0.5136802196502686,
"epoch": 1.7713745866792632,
"grad_norm": 7718.82478571958,
"learning_rate": 2.1375188698293855e-06,
"logits": -0.462724506855011,
"logps": -76.12433624267578,
"loss": 330.2958,
"objective": 325.9873046875,
"ranking_simple": 0.6208333373069763,
"regularize": 0.22590765357017517,
"step": 625
},
{
"dpo_loss": 0.536301851272583,
"epoch": 1.7855455833726972,
"grad_norm": 8761.966130498178,
"learning_rate": 2.096670352632873e-06,
"logits": -0.5007703304290771,
"logps": -77.76058197021484,
"loss": 326.3625,
"objective": 339.7125549316406,
"ranking_simple": 0.5625,
"regularize": 0.23891252279281616,
"step": 630
},
{
"dpo_loss": 0.517907977104187,
"epoch": 1.7997165800661312,
"grad_norm": 7178.908644562367,
"learning_rate": 2.0559321017347286e-06,
"logits": -0.585433840751648,
"logps": -76.77262878417969,
"loss": 319.7427,
"objective": 316.2701110839844,
"ranking_simple": 0.637499988079071,
"regularize": 0.218379944562912,
"step": 635
},
{
"dpo_loss": 0.5069996118545532,
"epoch": 1.8138875767595655,
"grad_norm": 7586.408561007724,
"learning_rate": 2.01531525456598e-06,
"logits": -0.5689796805381775,
"logps": -77.61341094970703,
"loss": 320.7854,
"objective": 324.8812255859375,
"ranking_simple": 0.5874999761581421,
"regularize": 0.2512456178665161,
"step": 640
},
{
"dpo_loss": 0.5200368762016296,
"epoch": 1.8280585734529995,
"grad_norm": 8686.608118182792,
"learning_rate": 1.974830915367086e-06,
"logits": -0.5587595701217651,
"logps": -78.18843078613281,
"loss": 330.6056,
"objective": 326.5403747558594,
"ranking_simple": 0.637499988079071,
"regularize": 0.24110235273838043,
"step": 645
},
{
"dpo_loss": 0.5109516382217407,
"epoch": 1.8422295701464337,
"grad_norm": 9365.891667609872,
"learning_rate": 1.93449015215215e-06,
"logits": -0.5691719055175781,
"logps": -78.31002807617188,
"loss": 333.5158,
"objective": 333.96240234375,
"ranking_simple": 0.612500011920929,
"regularize": 0.23523901402950287,
"step": 650
},
{
"epoch": 1.8422295701464337,
"eval_dpo_loss": 0.7084385752677917,
"eval_logits": -0.596887469291687,
"eval_logps": -81.02249908447266,
"eval_loss": 577.92919921875,
"eval_objective": 565.5914916992188,
"eval_ranking_simple": 0.5393374562263489,
"eval_regularize": 0.5890585780143738,
"eval_runtime": 370.9761,
"eval_samples_per_second": 15.607,
"eval_steps_per_second": 1.302,
"step": 650
},
{
"dpo_loss": 0.5157236456871033,
"epoch": 1.8564005668398678,
"grad_norm": 8570.37706164772,
"learning_rate": 1.8943039936830347e-06,
"logits": -0.5198069214820862,
"logps": -75.62894439697266,
"loss": 321.6153,
"objective": 320.3071594238281,
"ranking_simple": 0.5916666388511658,
"regularize": 0.2329235076904297,
"step": 655
},
{
"dpo_loss": 0.521937906742096,
"epoch": 1.8705715635333018,
"grad_norm": 8345.743884997746,
"learning_rate": 1.8542834264542091e-06,
"logits": -0.5126068592071533,
"logps": -78.36030578613281,
"loss": 328.7253,
"objective": 325.4534606933594,
"ranking_simple": 0.625,
"regularize": 0.2483067512512207,
"step": 660
},
{
"dpo_loss": 0.515285313129425,
"epoch": 1.8847425602267358,
"grad_norm": 7929.985369188011,
"learning_rate": 1.814439391689151e-06,
"logits": -0.5389847159385681,
"logps": -76.56269073486328,
"loss": 319.5206,
"objective": 331.35791015625,
"ranking_simple": 0.6625000238418579,
"regularize": 0.23167690634727478,
"step": 665
},
{
"dpo_loss": 0.5117120146751404,
"epoch": 1.89891355692017,
"grad_norm": 8611.003311273333,
"learning_rate": 1.7747827823491253e-06,
"logits": -0.4842732548713684,
"logps": -75.30670928955078,
"loss": 310.9476,
"objective": 312.0437316894531,
"ranking_simple": 0.5583333373069763,
"regularize": 0.23464351892471313,
"step": 670
},
{
"dpo_loss": 0.5363326668739319,
"epoch": 1.9130845536136043,
"grad_norm": 8390.641065212347,
"learning_rate": 1.7353244401551566e-06,
"logits": -0.4712333679199219,
"logps": -76.18867492675781,
"loss": 317.9931,
"objective": 336.70703125,
"ranking_simple": 0.625,
"regularize": 0.23009681701660156,
"step": 675
},
{
"dpo_loss": 0.5199182033538818,
"epoch": 1.9272555503070383,
"grad_norm": 8581.710628204828,
"learning_rate": 1.6960751526240122e-06,
"logits": -0.49357444047927856,
"logps": -78.31690216064453,
"loss": 315.1092,
"objective": 305.8600769042969,
"ranking_simple": 0.6291666626930237,
"regularize": 0.20979805290699005,
"step": 680
},
{
"dpo_loss": 0.5106812119483948,
"epoch": 1.9414265470004723,
"grad_norm": 8197.15415072432,
"learning_rate": 1.6570456501189996e-06,
"logits": -0.5017139911651611,
"logps": -76.01095581054688,
"loss": 306.0857,
"objective": 298.7364501953125,
"ranking_simple": 0.5333333611488342,
"regularize": 0.20531941950321198,
"step": 685
},
{
"dpo_loss": 0.5228912830352783,
"epoch": 1.9555975436939064,
"grad_norm": 8227.185493128562,
"learning_rate": 1.6182466029163974e-06,
"logits": -0.503932535648346,
"logps": -76.89771270751953,
"loss": 312.1701,
"objective": 309.2964782714844,
"ranking_simple": 0.5874999761581421,
"regularize": 0.1995265930891037,
"step": 690
},
{
"dpo_loss": 0.5291420817375183,
"epoch": 1.9697685403873406,
"grad_norm": 8124.705980309312,
"learning_rate": 1.5796886182883053e-06,
"logits": -0.47076740860939026,
"logps": -77.54142761230469,
"loss": 303.4818,
"objective": 305.0574951171875,
"ranking_simple": 0.5625,
"regularize": 0.16832788288593292,
"step": 695
},
{
"dpo_loss": 0.5130675435066223,
"epoch": 1.9839395370807746,
"grad_norm": 8798.438767811667,
"learning_rate": 1.541382237602721e-06,
"logits": -0.4266551434993744,
"logps": -77.73848724365234,
"loss": 316.2014,
"objective": 308.5932312011719,
"ranking_simple": 0.6041666865348816,
"regularize": 0.20295077562332153,
"step": 700
},
{
"epoch": 1.9839395370807746,
"eval_dpo_loss": 0.709835410118103,
"eval_logits": -0.5956435203552246,
"eval_logps": -80.5416488647461,
"eval_loss": 577.6038208007812,
"eval_objective": 564.6389770507812,
"eval_ranking_simple": 0.5408902764320374,
"eval_regularize": 0.5856665968894958,
"eval_runtime": 367.2185,
"eval_samples_per_second": 15.767,
"eval_steps_per_second": 1.315,
"step": 700
},
{
"dpo_loss": 0.5043766498565674,
"epoch": 1.9981105337742089,
"grad_norm": 8813.527233337918,
"learning_rate": 1.5033379334416376e-06,
"logits": -0.415615975856781,
"logps": -76.69499969482422,
"loss": 307.2797,
"objective": 299.0835266113281,
"ranking_simple": 0.5833333134651184,
"regularize": 0.1889916956424713,
"step": 705
},
{
"dpo_loss": 0.5075303316116333,
"epoch": 2.012281530467643,
"grad_norm": 8152.029201211714,
"learning_rate": 1.465566106737942e-06,
"logits": -0.5361739993095398,
"logps": -76.06444549560547,
"loss": 296.988,
"objective": 296.4339904785156,
"ranking_simple": 0.6291666626930237,
"regularize": 0.16836042702198029,
"step": 710
},
{
"dpo_loss": 0.499095618724823,
"epoch": 2.026452527161077,
"grad_norm": 7474.705902903634,
"learning_rate": 1.4280770839319073e-06,
"logits": -0.5198894739151001,
"logps": -75.47317504882812,
"loss": 290.1166,
"objective": 297.8743896484375,
"ranking_simple": 0.7041666507720947,
"regularize": 0.2067592293024063,
"step": 715
},
{
"dpo_loss": 0.49675101041793823,
"epoch": 2.040623523854511,
"grad_norm": 7687.909209756474,
"learning_rate": 1.3908811141480408e-06,
"logits": -0.48511701822280884,
"logps": -76.5396957397461,
"loss": 293.1388,
"objective": 296.0254821777344,
"ranking_simple": 0.625,
"regularize": 0.18073533475399017,
"step": 720
},
{
"dpo_loss": 0.5085265636444092,
"epoch": 2.0547945205479454,
"grad_norm": 7150.825567237203,
"learning_rate": 1.353988366393083e-06,
"logits": -0.4982639253139496,
"logps": -79.44617462158203,
"loss": 288.661,
"objective": 303.2115173339844,
"ranking_simple": 0.6166666746139526,
"regularize": 0.1895003318786621,
"step": 725
},
{
"dpo_loss": 0.5071407556533813,
"epoch": 2.0689655172413794,
"grad_norm": 7729.725354104077,
"learning_rate": 1.3174089267758983e-06,
"logits": -0.5406936407089233,
"logps": -77.1041488647461,
"loss": 295.914,
"objective": 279.0093078613281,
"ranking_simple": 0.6041666865348816,
"regularize": 0.1544083058834076,
"step": 730
},
{
"dpo_loss": 0.508653998374939,
"epoch": 2.0831365139348135,
"grad_norm": 7709.562610036098,
"learning_rate": 1.2811527957500344e-06,
"logits": -0.5032610297203064,
"logps": -76.36556243896484,
"loss": 281.1705,
"objective": 274.6349792480469,
"ranking_simple": 0.5625,
"regularize": 0.16756394505500793,
"step": 735
},
{
"dpo_loss": 0.5007545351982117,
"epoch": 2.0973075106282475,
"grad_norm": 7780.159502686646,
"learning_rate": 1.245229885379699e-06,
"logits": -0.5718483328819275,
"logps": -76.52877807617188,
"loss": 287.3269,
"objective": 281.77728271484375,
"ranking_simple": 0.625,
"regularize": 0.15750272572040558,
"step": 740
},
{
"dpo_loss": 0.49083974957466125,
"epoch": 2.1114785073216815,
"grad_norm": 7559.0234640974395,
"learning_rate": 1.2096500166298992e-06,
"logits": -0.5142738223075867,
"logps": -77.08470916748047,
"loss": 288.8493,
"objective": 278.6231384277344,
"ranking_simple": 0.6208333373069763,
"regularize": 0.1696869283914566,
"step": 745
},
{
"dpo_loss": 0.510530412197113,
"epoch": 2.1256495040151155,
"grad_norm": 8108.616262187852,
"learning_rate": 1.1744229166814889e-06,
"logits": -0.5391489267349243,
"logps": -75.9011459350586,
"loss": 295.2996,
"objective": 287.6244201660156,
"ranking_simple": 0.5791666507720947,
"regularize": 0.14458681643009186,
"step": 750
},
{
"epoch": 2.1256495040151155,
"eval_dpo_loss": 0.7108220458030701,
"eval_logits": -0.5878574252128601,
"eval_logps": -81.0738525390625,
"eval_loss": 579.5015258789062,
"eval_objective": 567.8404541015625,
"eval_ranking_simple": 0.5393374562263489,
"eval_regularize": 0.592528223991394,
"eval_runtime": 367.7716,
"eval_samples_per_second": 15.743,
"eval_steps_per_second": 1.313,
"step": 750
},
{
"dpo_loss": 0.5009695291519165,
"epoch": 2.13982050070855,
"grad_norm": 8018.931804650577,
"learning_rate": 1.1395582162718524e-06,
"logits": -0.5374471545219421,
"logps": -78.51016998291016,
"loss": 291.8306,
"objective": 290.9220275878906,
"ranking_simple": 0.5916666388511658,
"regularize": 0.1683264821767807,
"step": 755
},
{
"dpo_loss": 0.5287482738494873,
"epoch": 2.153991497401984,
"grad_norm": 7462.123206327768,
"learning_rate": 1.1050654470619602e-06,
"logits": -0.46891796588897705,
"logps": -75.76030731201172,
"loss": 290.7075,
"objective": 282.1697082519531,
"ranking_simple": 0.6041666865348816,
"regularize": 0.15730994939804077,
"step": 760
},
{
"dpo_loss": 0.5191124081611633,
"epoch": 2.168162494095418,
"grad_norm": 8366.940049042583,
"learning_rate": 1.0709540390305061e-06,
"logits": -0.48605969548225403,
"logps": -76.97061920166016,
"loss": 283.513,
"objective": 276.4500732421875,
"ranking_simple": 0.6333333253860474,
"regularize": 0.14232973754405975,
"step": 765
},
{
"dpo_loss": 0.5144416093826294,
"epoch": 2.182333490788852,
"grad_norm": 8218.822763503342,
"learning_rate": 1.0372333178958462e-06,
"logits": -0.4805113971233368,
"logps": -77.28369140625,
"loss": 295.4732,
"objective": 300.4273376464844,
"ranking_simple": 0.5874999761581421,
"regularize": 0.17838290333747864,
"step": 770
},
{
"dpo_loss": 0.5137962698936462,
"epoch": 2.196504487482286,
"grad_norm": 6441.500828763883,
"learning_rate": 1.0039125025664392e-06,
"logits": -0.5039299130439758,
"logps": -77.55305480957031,
"loss": 277.693,
"objective": 280.0492248535156,
"ranking_simple": 0.6541666388511658,
"regularize": 0.14146603643894196,
"step": 775
},
{
"dpo_loss": 0.5056738257408142,
"epoch": 2.21067548417572,
"grad_norm": 8834.102544429094,
"learning_rate": 9.710007026204896e-07,
"logits": -0.43781086802482605,
"logps": -77.69278717041016,
"loss": 288.3063,
"objective": 297.6618347167969,
"ranking_simple": 0.5249999761581421,
"regularize": 0.16135714948177338,
"step": 780
},
{
"dpo_loss": 0.4925435483455658,
"epoch": 2.2248464808691546,
"grad_norm": 7651.241881609664,
"learning_rate": 9.385069158154805e-07,
"logits": -0.4533029794692993,
"logps": -77.70331573486328,
"loss": 280.9115,
"objective": 270.46942138671875,
"ranking_simple": 0.5833333134651184,
"regularize": 0.14943251013755798,
"step": 785
},
{
"dpo_loss": 0.49593406915664673,
"epoch": 2.2390174775625886,
"grad_norm": 7219.442805293083,
"learning_rate": 9.064400256282757e-07,
"logits": -0.44717687368392944,
"logps": -78.27497863769531,
"loss": 282.4128,
"objective": 267.8057861328125,
"ranking_simple": 0.6333333253860474,
"regularize": 0.15546827018260956,
"step": 790
},
{
"dpo_loss": 0.505749523639679,
"epoch": 2.2531884742560226,
"grad_norm": 7763.536847557715,
"learning_rate": 8.74808798826467e-07,
"logits": -0.4969979226589203,
"logps": -79.33271789550781,
"loss": 281.3165,
"objective": 274.2493896484375,
"ranking_simple": 0.625,
"regularize": 0.1496947556734085,
"step": 795
},
{
"dpo_loss": 0.48660808801651,
"epoch": 2.2673594709494567,
"grad_norm": 7932.089944092327,
"learning_rate": 8.436218830716259e-07,
"logits": -0.5253292322158813,
"logps": -78.79373931884766,
"loss": 290.0791,
"objective": 280.91900634765625,
"ranking_simple": 0.612500011920929,
"regularize": 0.16623292863368988,
"step": 800
},
{
"epoch": 2.2673594709494567,
"eval_dpo_loss": 0.7088403105735779,
"eval_logits": -0.5885158777236938,
"eval_logps": -81.68885803222656,
"eval_loss": 576.8207397460938,
"eval_objective": 564.8282470703125,
"eval_ranking_simple": 0.5377846956253052,
"eval_regularize": 0.5875974893569946,
"eval_runtime": 367.7198,
"eval_samples_per_second": 15.746,
"eval_steps_per_second": 1.314,
"step": 800
},
{
"dpo_loss": 0.5162143707275391,
"epoch": 2.2815304676428907,
"grad_norm": 7214.841128896386,
"learning_rate": 8.1288780455512e-07,
"logits": -0.5276073217391968,
"logps": -77.3137435913086,
"loss": 278.3046,
"objective": 292.67730712890625,
"ranking_simple": 0.5874999761581421,
"regularize": 0.16379231214523315,
"step": 805
},
{
"dpo_loss": 0.5017233490943909,
"epoch": 2.295701464336325,
"grad_norm": 6869.828749708057,
"learning_rate": 7.826149656671386e-07,
"logits": -0.5925108790397644,
"logps": -76.71666717529297,
"loss": 282.4796,
"objective": 281.0669860839844,
"ranking_simple": 0.675000011920929,
"regularize": 0.1503111720085144,
"step": 810
},
{
"dpo_loss": 0.4981227219104767,
"epoch": 2.309872461029759,
"grad_norm": 7651.540848559703,
"learning_rate": 7.528116426995605e-07,
"logits": -0.5629077553749084,
"logps": -78.19300079345703,
"loss": 273.4494,
"objective": 278.7046203613281,
"ranking_simple": 0.5958333611488342,
"regularize": 0.13803134858608246,
"step": 815
},
{
"dpo_loss": 0.5106547474861145,
"epoch": 2.324043457723193,
"grad_norm": 8303.88053874128,
"learning_rate": 7.234859835833022e-07,
"logits": -0.49964413046836853,
"logps": -77.4349136352539,
"loss": 281.5386,
"objective": 289.7933349609375,
"ranking_simple": 0.5833333134651184,
"regularize": 0.14034216105937958,
"step": 820
},
{
"dpo_loss": 0.5033460259437561,
"epoch": 2.3382144544166272,
"grad_norm": 7465.14408583757,
"learning_rate": 6.94646005660749e-07,
"logits": -0.5037187337875366,
"logps": -76.92729949951172,
"loss": 278.1418,
"objective": 275.7555847167969,
"ranking_simple": 0.637499988079071,
"regularize": 0.14195986092090607,
"step": 825
},
{
"dpo_loss": 0.5009591579437256,
"epoch": 2.3523854511100613,
"grad_norm": 7370.912864973515,
"learning_rate": 6.662995934939007e-07,
"logits": -0.5249782800674438,
"logps": -78.88058471679688,
"loss": 277.6341,
"objective": 290.5874328613281,
"ranking_simple": 0.6541666388511658,
"regularize": 0.15550047159194946,
"step": 830
},
{
"dpo_loss": 0.506280243396759,
"epoch": 2.3665564478034957,
"grad_norm": 7154.510003938923,
"learning_rate": 6.384544967088063e-07,
"logits": -0.5261546969413757,
"logps": -78.27130889892578,
"loss": 283.8206,
"objective": 284.1214294433594,
"ranking_simple": 0.6333333253860474,
"regularize": 0.15022988617420197,
"step": 835
},
{
"dpo_loss": 0.5248311758041382,
"epoch": 2.3807274444969297,
"grad_norm": 7561.035287310657,
"learning_rate": 6.111183278768956e-07,
"logits": -0.47096776962280273,
"logps": -78.47908020019531,
"loss": 281.1299,
"objective": 287.78863525390625,
"ranking_simple": 0.5458333492279053,
"regularize": 0.13501495122909546,
"step": 840
},
{
"dpo_loss": 0.5237764120101929,
"epoch": 2.3948984411903638,
"grad_norm": 9019.208502842846,
"learning_rate": 5.842985604337769e-07,
"logits": -0.524657666683197,
"logps": -79.37838745117188,
"loss": 288.7943,
"objective": 290.1376647949219,
"ranking_simple": 0.6083333492279053,
"regularize": 0.13873761892318726,
"step": 845
},
{
"dpo_loss": 0.5015512704849243,
"epoch": 2.409069437883798,
"grad_norm": 7214.405283580069,
"learning_rate": 5.580025266360764e-07,
"logits": -0.5334345102310181,
"logps": -77.42970275878906,
"loss": 277.1292,
"objective": 274.3782043457031,
"ranking_simple": 0.5916666388511658,
"regularize": 0.13637620210647583,
"step": 850
},
{
"epoch": 2.409069437883798,
"eval_dpo_loss": 0.7109217643737793,
"eval_logits": -0.5770819187164307,
"eval_logps": -81.54353332519531,
"eval_loss": 579.0093994140625,
"eval_objective": 567.1205444335938,
"eval_ranking_simple": 0.5383023023605347,
"eval_regularize": 0.5911442041397095,
"eval_runtime": 370.8205,
"eval_samples_per_second": 15.614,
"eval_steps_per_second": 1.303,
"step": 850
},
{
"dpo_loss": 0.5071312785148621,
"epoch": 2.423240434577232,
"grad_norm": 7272.234405221214,
"learning_rate": 5.322374155568688e-07,
"logits": -0.5134973526000977,
"logps": -76.6072769165039,
"loss": 278.8906,
"objective": 276.31671142578125,
"ranking_simple": 0.6083333492279053,
"regularize": 0.14478901028633118,
"step": 855
},
{
"dpo_loss": 0.5126808881759644,
"epoch": 2.4374114312706663,
"grad_norm": 8279.020308548867,
"learning_rate": 5.070102711202606e-07,
"logits": -0.4904005825519562,
"logps": -78.11277770996094,
"loss": 272.6675,
"objective": 259.9554748535156,
"ranking_simple": 0.6041666865348816,
"regularize": 0.12409182637929916,
"step": 860
},
{
"dpo_loss": 0.5031525492668152,
"epoch": 2.4515824279641003,
"grad_norm": 7810.5501789039035,
"learning_rate": 4.823279901756498e-07,
"logits": -0.5084951519966125,
"logps": -77.30481719970703,
"loss": 278.5447,
"objective": 284.18084716796875,
"ranking_simple": 0.5916666388511658,
"regularize": 0.1416517198085785,
"step": 865
},
{
"dpo_loss": 0.5099405646324158,
"epoch": 2.4657534246575343,
"grad_norm": 7054.3955623341435,
"learning_rate": 4.581973206121948e-07,
"logits": -0.522720456123352,
"logps": -78.04560852050781,
"loss": 278.9006,
"objective": 274.6994934082031,
"ranking_simple": 0.5958333611488342,
"regularize": 0.13992194831371307,
"step": 870
},
{
"dpo_loss": 0.4974448084831238,
"epoch": 2.4799244213509684,
"grad_norm": 7158.801947079291,
"learning_rate": 4.3462485951401126e-07,
"logits": -0.481945663690567,
"logps": -77.63534545898438,
"loss": 265.5646,
"objective": 269.0285949707031,
"ranking_simple": 0.612500011920929,
"regularize": 0.13618159294128418,
"step": 875
},
{
"dpo_loss": 0.516840398311615,
"epoch": 2.4940954180444024,
"grad_norm": 7224.52824602365,
"learning_rate": 4.116170513565942e-07,
"logits": -0.42012926936149597,
"logps": -77.26931762695312,
"loss": 277.9572,
"objective": 277.60955810546875,
"ranking_simple": 0.5708333253860474,
"regularize": 0.12541402876377106,
"step": 880
},
{
"dpo_loss": 0.5121944546699524,
"epoch": 2.5082664147378364,
"grad_norm": 7670.321121071118,
"learning_rate": 3.891801862449629e-07,
"logits": -0.5377725958824158,
"logps": -76.21176147460938,
"loss": 273.7286,
"objective": 278.1051940917969,
"ranking_simple": 0.612500011920929,
"regularize": 0.13135148584842682,
"step": 885
},
{
"dpo_loss": 0.5038707852363586,
"epoch": 2.5224374114312704,
"grad_norm": 7464.3594622623605,
"learning_rate": 3.6732039819400686e-07,
"logits": -0.5120099782943726,
"logps": -75.08890533447266,
"loss": 270.7643,
"objective": 259.2137145996094,
"ranking_simple": 0.6000000238418579,
"regularize": 0.1371425837278366,
"step": 890
},
{
"dpo_loss": 0.5104149580001831,
"epoch": 2.536608408124705,
"grad_norm": 7776.17053954941,
"learning_rate": 3.46043663451511e-07,
"logits": -0.5063762068748474,
"logps": -77.50283813476562,
"loss": 274.0527,
"objective": 279.8547668457031,
"ranking_simple": 0.5708333253860474,
"regularize": 0.13040438294410706,
"step": 895
},
{
"dpo_loss": 0.5273467302322388,
"epoch": 2.550779404818139,
"grad_norm": 7627.369911355359,
"learning_rate": 3.253557988643072e-07,
"logits": -0.49025458097457886,
"logps": -76.16547393798828,
"loss": 271.9766,
"objective": 274.15386962890625,
"ranking_simple": 0.625,
"regularize": 0.13161370158195496,
"step": 900
},
{
"epoch": 2.550779404818139,
"eval_dpo_loss": 0.7098731994628906,
"eval_logits": -0.5707982182502747,
"eval_logps": -81.16320037841797,
"eval_loss": 577.3417358398438,
"eval_objective": 565.7183837890625,
"eval_ranking_simple": 0.5362318754196167,
"eval_regularize": 0.5880821347236633,
"eval_runtime": 368.1908,
"eval_samples_per_second": 15.726,
"eval_steps_per_second": 1.312,
"step": 900
},
{
"dpo_loss": 0.512609601020813,
"epoch": 2.564950401511573,
"grad_norm": 6804.885302407535,
"learning_rate": 3.052624602880064e-07,
"logits": -0.5115708112716675,
"logps": -75.81733703613281,
"loss": 265.9078,
"objective": 269.9800109863281,
"ranking_simple": 0.5833333134651184,
"regularize": 0.14800624549388885,
"step": 905
},
{
"dpo_loss": 0.4937320947647095,
"epoch": 2.579121398205007,
"grad_norm": 7198.203187575293,
"learning_rate": 2.8576914104074425e-07,
"logits": -0.48827874660491943,
"logps": -79.18850708007812,
"loss": 265.7443,
"objective": 273.89166259765625,
"ranking_simple": 0.6041666865348816,
"regularize": 0.14111953973770142,
"step": 910
},
{
"dpo_loss": 0.5084269642829895,
"epoch": 2.593292394898441,
"grad_norm": 6942.193456170007,
"learning_rate": 2.6688117040136463e-07,
"logits": -0.5942879915237427,
"logps": -78.21766662597656,
"loss": 278.3137,
"objective": 263.6408386230469,
"ranking_simple": 0.637499988079071,
"regularize": 0.11606747657060623,
"step": 915
},
{
"dpo_loss": 0.498431533575058,
"epoch": 2.6074633915918755,
"grad_norm": 6451.638108152793,
"learning_rate": 2.486037121524448e-07,
"logits": -0.46755722165107727,
"logps": -77.2091293334961,
"loss": 270.5728,
"objective": 287.58538818359375,
"ranking_simple": 0.612500011920929,
"regularize": 0.14371080696582794,
"step": 920
},
{
"dpo_loss": 0.5088561177253723,
"epoch": 2.6216343882853095,
"grad_norm": 8257.41304629772,
"learning_rate": 2.3094176316856982e-07,
"logits": -0.4761093258857727,
"logps": -76.82450866699219,
"loss": 270.0178,
"objective": 261.9400634765625,
"ranking_simple": 0.5708333253860474,
"regularize": 0.11346574872732162,
"step": 925
},
{
"dpo_loss": 0.5108747482299805,
"epoch": 2.6358053849787435,
"grad_norm": 7715.040672522825,
"learning_rate": 2.13900152050239e-07,
"logits": -0.4767756164073944,
"logps": -77.13011932373047,
"loss": 273.9086,
"objective": 285.1964111328125,
"ranking_simple": 0.5666666626930237,
"regularize": 0.13920140266418457,
"step": 930
},
{
"dpo_loss": 0.5189302563667297,
"epoch": 2.6499763816721775,
"grad_norm": 8548.789471220947,
"learning_rate": 1.9748353780377234e-07,
"logits": -0.45983853936195374,
"logps": -76.03173828125,
"loss": 276.8373,
"objective": 269.6080322265625,
"ranking_simple": 0.6583333611488342,
"regularize": 0.12301207333803177,
"step": 935
},
{
"dpo_loss": 0.5017234683036804,
"epoch": 2.6641473783656116,
"grad_norm": 8162.699837992091,
"learning_rate": 1.8169640856758652e-07,
"logits": -0.504283607006073,
"logps": -78.90401458740234,
"loss": 281.2083,
"objective": 285.80694580078125,
"ranking_simple": 0.5958333611488342,
"regularize": 0.1213822215795517,
"step": 940
},
{
"dpo_loss": 0.4903925657272339,
"epoch": 2.678318375059046,
"grad_norm": 7208.828785964443,
"learning_rate": 1.6654308038518057e-07,
"logits": -0.5296005010604858,
"logps": -77.27458953857422,
"loss": 265.3151,
"objective": 269.78106689453125,
"ranking_simple": 0.5833333134651184,
"regularize": 0.12937407195568085,
"step": 945
},
{
"dpo_loss": 0.5223451256752014,
"epoch": 2.69248937175248,
"grad_norm": 7752.613058246486,
"learning_rate": 1.5202769602517514e-07,
"logits": -0.45917627215385437,
"logps": -77.59455871582031,
"loss": 273.4982,
"objective": 260.7156982421875,
"ranking_simple": 0.6333333253860474,
"regularize": 0.10433920472860336,
"step": 950
},
{
"epoch": 2.69248937175248,
"eval_dpo_loss": 0.7103093862533569,
"eval_logits": -0.5680380463600159,
"eval_logps": -81.19538879394531,
"eval_loss": 578.9320678710938,
"eval_objective": 567.1773071289062,
"eval_ranking_simple": 0.5362318754196167,
"eval_regularize": 0.5910264849662781,
"eval_runtime": 367.6922,
"eval_samples_per_second": 15.747,
"eval_steps_per_second": 1.314,
"step": 950
},
{
"dpo_loss": 0.5060604810714722,
"epoch": 2.706660368445914,
"grad_norm": 6751.002791925838,
"learning_rate": 1.381542238487188e-07,
"logits": -0.5122019648551941,
"logps": -77.12061309814453,
"loss": 264.4783,
"objective": 250.47158813476562,
"ranking_simple": 0.5916666388511658,
"regularize": 0.12410003691911697,
"step": 955
},
{
"dpo_loss": 0.5180147886276245,
"epoch": 2.720831365139348,
"grad_norm": 8054.959159664104,
"learning_rate": 1.2492645672457838e-07,
"logits": -0.48304110765457153,
"logps": -77.1063003540039,
"loss": 279.6324,
"objective": 290.03631591796875,
"ranking_simple": 0.6041666865348816,
"regularize": 0.13714757561683655,
"step": 960
},
{
"dpo_loss": 0.49898579716682434,
"epoch": 2.735002361832782,
"grad_norm": 7601.482545856324,
"learning_rate": 1.1234801099220787e-07,
"logits": -0.5196807980537415,
"logps": -77.11388397216797,
"loss": 269.8563,
"objective": 259.89813232421875,
"ranking_simple": 0.6416666507720947,
"regularize": 0.13252395391464233,
"step": 965
},
{
"dpo_loss": 0.51671302318573,
"epoch": 2.7491733585262166,
"grad_norm": 7233.095787852224,
"learning_rate": 1.004223254730749e-07,
"logits": -0.5174197554588318,
"logps": -78.05948638916016,
"loss": 262.7624,
"objective": 268.42169189453125,
"ranking_simple": 0.6333333253860474,
"regularize": 0.13439247012138367,
"step": 970
},
{
"dpo_loss": 0.4989195764064789,
"epoch": 2.7633443552196506,
"grad_norm": 7653.138309833151,
"learning_rate": 8.915266053052374e-08,
"logits": -0.4456302523612976,
"logps": -76.5742416381836,
"loss": 265.2271,
"objective": 260.4195556640625,
"ranking_simple": 0.625,
"regularize": 0.12436621636152267,
"step": 975
},
{
"dpo_loss": 0.5052908658981323,
"epoch": 2.7775153519130846,
"grad_norm": 7354.157364452891,
"learning_rate": 7.854209717842231e-08,
"logits": -0.5343514084815979,
"logps": -76.55020141601562,
"loss": 273.2646,
"objective": 284.3968200683594,
"ranking_simple": 0.6083333492279053,
"regularize": 0.1349634826183319,
"step": 980
},
{
"dpo_loss": 0.5007596015930176,
"epoch": 2.7916863486065187,
"grad_norm": 6932.45863115929,
"learning_rate": 6.859353623884569e-08,
"logits": -0.44778621196746826,
"logps": -74.72814178466797,
"loss": 272.9477,
"objective": 261.4037170410156,
"ranking_simple": 0.5874999761581421,
"regularize": 0.12619872391223907,
"step": 985
},
{
"dpo_loss": 0.50762540102005,
"epoch": 2.8058573452999527,
"grad_norm": 7347.273398591806,
"learning_rate": 5.930969754901844e-08,
"logits": -0.4778214693069458,
"logps": -78.38259887695312,
"loss": 260.7251,
"objective": 262.8490905761719,
"ranking_simple": 0.5791666507720947,
"regularize": 0.11383456736803055,
"step": 990
},
{
"dpo_loss": 0.5177646279335022,
"epoch": 2.820028341993387,
"grad_norm": 8414.372934711379,
"learning_rate": 5.069311921774039e-08,
"logits": -0.5479720830917358,
"logps": -76.77764129638672,
"loss": 273.3689,
"objective": 282.17889404296875,
"ranking_simple": 0.6208333373069763,
"regularize": 0.13415595889091492,
"step": 995
},
{
"dpo_loss": 0.5031678080558777,
"epoch": 2.8341993386868207,
"grad_norm": 6910.266147708002,
"learning_rate": 4.2746156931490756e-08,
"logits": -0.4682956337928772,
"logps": -75.585205078125,
"loss": 265.7935,
"objective": 275.0522155761719,
"ranking_simple": 0.612500011920929,
"regularize": 0.12777787446975708,
"step": 1000
},
{
"epoch": 2.8341993386868207,
"eval_dpo_loss": 0.709902822971344,
"eval_logits": -0.5704091191291809,
"eval_logps": -81.14698028564453,
"eval_loss": 578.3192138671875,
"eval_objective": 566.560791015625,
"eval_ranking_simple": 0.5367494821548462,
"eval_regularize": 0.5901817083358765,
"eval_runtime": 368.6553,
"eval_samples_per_second": 15.706,
"eval_steps_per_second": 1.31,
"step": 1000
},
{
"dpo_loss": 0.5090419054031372,
"epoch": 2.848370335380255,
"grad_norm": 7970.005574260741,
"learning_rate": 3.547098331040916e-08,
"logits": -0.48795023560523987,
"logps": -76.1548843383789,
"loss": 272.1969,
"objective": 268.9000244140625,
"ranking_simple": 0.6166666746139526,
"regularize": 0.12190086394548416,
"step": 1005
},
{
"dpo_loss": 0.5107741355895996,
"epoch": 2.862541332073689,
"grad_norm": 7994.674781206224,
"learning_rate": 2.8869587314321324e-08,
"logits": -0.5256258845329285,
"logps": -76.37800598144531,
"loss": 282.5812,
"objective": 272.2499084472656,
"ranking_simple": 0.612500011920929,
"regularize": 0.11501624435186386,
"step": 1010
},
{
"dpo_loss": 0.5150614380836487,
"epoch": 2.8767123287671232,
"grad_norm": 7261.730587266494,
"learning_rate": 2.2943773698977935e-08,
"logits": -0.4966468811035156,
"logps": -75.5634994506836,
"loss": 260.7579,
"objective": 261.33038330078125,
"ranking_simple": 0.6458333134651184,
"regularize": 0.11271250247955322,
"step": 1015
},
{
"dpo_loss": 0.508990466594696,
"epoch": 2.8908833254605573,
"grad_norm": 7841.280059779772,
"learning_rate": 1.7695162522652352e-08,
"logits": -0.4532057046890259,
"logps": -77.1956558227539,
"loss": 271.9978,
"objective": 272.5274963378906,
"ranking_simple": 0.5833333134651184,
"regularize": 0.12634117901325226,
"step": 1020
},
{
"dpo_loss": 0.5181344151496887,
"epoch": 2.9050543221539913,
"grad_norm": 7685.424658753134,
"learning_rate": 1.3125188703233815e-08,
"logits": -0.53793865442276,
"logps": -77.12080383300781,
"loss": 268.2244,
"objective": 275.9144287109375,
"ranking_simple": 0.625,
"regularize": 0.12161087244749069,
"step": 1025
},
{
"dpo_loss": 0.5186858773231506,
"epoch": 2.9192253188474258,
"grad_norm": 7664.878619416483,
"learning_rate": 9.235101625932885e-09,
"logits": -0.535256564617157,
"logps": -76.86356353759766,
"loss": 277.9306,
"objective": 276.3569641113281,
"ranking_simple": 0.5541666746139526,
"regularize": 0.13014444708824158,
"step": 1030
},
{
"dpo_loss": 0.5086424350738525,
"epoch": 2.9333963155408598,
"grad_norm": 7176.213599878172,
"learning_rate": 6.025964801714412e-09,
"logits": -0.4943471848964691,
"logps": -77.83872985839844,
"loss": 266.904,
"objective": 264.484375,
"ranking_simple": 0.6208333373069763,
"regularize": 0.11565522104501724,
"step": 1035
},
{
"dpo_loss": 0.5082514882087708,
"epoch": 2.947567312234294,
"grad_norm": 7034.435559100273,
"learning_rate": 3.4986555765434415e-09,
"logits": -0.5228769779205322,
"logps": -77.66551208496094,
"loss": 271.848,
"objective": 274.83355712890625,
"ranking_simple": 0.5666666626930237,
"regularize": 0.11535345017910004,
"step": 1040
},
{
"dpo_loss": 0.5133834481239319,
"epoch": 2.961738308927728,
"grad_norm": 7599.023611459706,
"learning_rate": 1.6538648915270794e-09,
"logits": -0.47324731945991516,
"logps": -79.62892150878906,
"loss": 268.8998,
"objective": 262.8771667480469,
"ranking_simple": 0.6208333373069763,
"regularize": 0.12152813374996185,
"step": 1045
},
{
"dpo_loss": 0.5033511519432068,
"epoch": 2.975909305621162,
"grad_norm": 8022.09759775128,
"learning_rate": 4.920970940180958e-10,
"logits": -0.5222968459129333,
"logps": -75.3882827758789,
"loss": 265.6855,
"objective": 262.70672607421875,
"ranking_simple": 0.6541666388511658,
"regularize": 0.1311034858226776,
"step": 1050
},
{
"epoch": 2.975909305621162,
"eval_dpo_loss": 0.7098360061645508,
"eval_logits": -0.5715492367744446,
"eval_logps": -81.13370513916016,
"eval_loss": 578.1592407226562,
"eval_objective": 566.3953857421875,
"eval_ranking_simple": 0.5362318754196167,
"eval_regularize": 0.5898736715316772,
"eval_runtime": 367.7364,
"eval_samples_per_second": 15.745,
"eval_steps_per_second": 1.313,
"step": 1050
},
{
"dpo_loss": 0.5144506692886353,
"epoch": 2.9900803023145963,
"grad_norm": 7870.107583704036,
"learning_rate": 1.3669799732163314e-11,
"logits": -0.4851941168308258,
"logps": -76.2236099243164,
"loss": 272.1204,
"objective": 281.3744201660156,
"ranking_simple": 0.6166666746139526,
"regularize": 0.13684484362602234,
"step": 1055
},
{
"epoch": 2.992914501653283,
"step": 1056,
"total_flos": 0.0,
"train_loss": 356.3672220056707,
"train_runtime": 34691.2758,
"train_samples_per_second": 4.393,
"train_steps_per_second": 0.03
}
],
"logging_steps": 5,
"max_steps": 1056,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}