{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.992914501653283, "eval_steps": 50, "global_step": 1056, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.002834199338686821, "grad_norm": 26513.418063359753, "learning_rate": 4.716981132075472e-08, "logits": -1.2867579460144043, "logps": -84.34933471679688, "loss": 458.1349, "objective": 431.1807556152344, "ranking_simple": 0.5833333134651184, "regularize": 0.3618059456348419, "step": 1 }, { "dpo_loss": 0.6925470232963562, "epoch": 0.014170996693434105, "grad_norm": 25616.845196611706, "learning_rate": 2.358490566037736e-07, "logits": -1.4298049211502075, "logps": -83.76395416259766, "loss": 481.1315, "objective": 456.7127990722656, "ranking_simple": 0.4895833432674408, "regularize": 0.40281566977500916, "step": 5 }, { "dpo_loss": 0.6908925175666809, "epoch": 0.02834199338686821, "grad_norm": 25634.63923836155, "learning_rate": 4.716981132075472e-07, "logits": -1.3988193273544312, "logps": -84.28076171875, "loss": 465.4449, "objective": 457.4544982910156, "ranking_simple": 0.574999988079071, "regularize": 0.4003960192203522, "step": 10 }, { "dpo_loss": 0.6889466643333435, "epoch": 0.042512990080302314, "grad_norm": 25839.066108054885, "learning_rate": 7.075471698113208e-07, "logits": -1.5292092561721802, "logps": -83.47270202636719, "loss": 471.5689, "objective": 480.5885314941406, "ranking_simple": 0.5666666626930237, "regularize": 0.4446539282798767, "step": 15 }, { "dpo_loss": 0.6812318563461304, "epoch": 0.05668398677373642, "grad_norm": 27814.64921945046, "learning_rate": 9.433962264150944e-07, "logits": -1.368198275566101, "logps": -82.4678726196289, "loss": 477.3994, "objective": 495.29437255859375, "ranking_simple": 0.48750001192092896, "regularize": 0.43586504459381104, "step": 20 }, { "dpo_loss": 0.681931734085083, "epoch": 0.07085498346717052, "grad_norm": 23147.9179925769, "learning_rate": 1.179245283018868e-06, "logits": -1.3939018249511719, "logps": -82.79817962646484, "loss": 465.0164, "objective": 451.0166320800781, "ranking_simple": 0.5708333253860474, "regularize": 0.39363664388656616, "step": 25 }, { "dpo_loss": 0.6791452765464783, "epoch": 0.08502598016060463, "grad_norm": 20393.372937507134, "learning_rate": 1.4150943396226415e-06, "logits": -1.4756665229797363, "logps": -83.33882141113281, "loss": 460.6366, "objective": 463.89178466796875, "ranking_simple": 0.4833333194255829, "regularize": 0.4173465967178345, "step": 30 }, { "dpo_loss": 0.6747857332229614, "epoch": 0.09919697685403873, "grad_norm": 23418.03953630638, "learning_rate": 1.650943396226415e-06, "logits": -1.4623119831085205, "logps": -81.88738250732422, "loss": 469.5609, "objective": 465.93438720703125, "ranking_simple": 0.5375000238418579, "regularize": 0.3794897198677063, "step": 35 }, { "dpo_loss": 0.6748687028884888, "epoch": 0.11336797354747284, "grad_norm": 23935.328029038144, "learning_rate": 1.8867924528301889e-06, "logits": -1.4446643590927124, "logps": -82.75718688964844, "loss": 464.1251, "objective": 463.13604736328125, "ranking_simple": 0.5041666626930237, "regularize": 0.3904489278793335, "step": 40 }, { "dpo_loss": 0.6506677865982056, "epoch": 0.12753897024090693, "grad_norm": 21652.885906992553, "learning_rate": 2.1226415094339624e-06, "logits": -1.4655033349990845, "logps": -82.44393157958984, "loss": 449.3847, "objective": 443.1551818847656, "ranking_simple": 0.5249999761581421, "regularize": 0.39696159958839417, "step": 45 }, { "dpo_loss": 0.6506399512290955, "epoch": 0.14170996693434104, "grad_norm": 21470.261560892, "learning_rate": 2.358490566037736e-06, "logits": -1.4927986860275269, "logps": -86.20177459716797, "loss": 470.2434, "objective": 469.0955810546875, "ranking_simple": 0.5041666626930237, "regularize": 0.4380335211753845, "step": 50 }, { "epoch": 0.14170996693434104, "eval_dpo_loss": 0.6872959136962891, "eval_logits": -1.4968135356903076, "eval_logps": -94.33769226074219, "eval_loss": 491.6701965332031, "eval_objective": 488.937255859375, "eval_ranking_simple": 0.5269151329994202, "eval_regularize": 0.4298084080219269, "eval_runtime": 367.8725, "eval_samples_per_second": 15.739, "eval_steps_per_second": 1.313, "step": 50 }, { "dpo_loss": 0.6506890654563904, "epoch": 0.15588096362777515, "grad_norm": 19949.76098202467, "learning_rate": 2.5943396226415095e-06, "logits": -1.470070719718933, "logps": -87.6925277709961, "loss": 445.3158, "objective": 479.81256103515625, "ranking_simple": 0.5166666507720947, "regularize": 0.3883524239063263, "step": 55 }, { "dpo_loss": 0.644140362739563, "epoch": 0.17005196032120926, "grad_norm": 19296.57503340127, "learning_rate": 2.830188679245283e-06, "logits": -1.3877280950546265, "logps": -84.86709594726562, "loss": 439.9873, "objective": 439.1716613769531, "ranking_simple": 0.5375000238418579, "regularize": 0.4104911684989929, "step": 60 }, { "dpo_loss": 0.6160763502120972, "epoch": 0.18422295701464336, "grad_norm": 19281.315668093266, "learning_rate": 3.0660377358490567e-06, "logits": -1.431338906288147, "logps": -79.60600280761719, "loss": 440.686, "objective": 418.4104309082031, "ranking_simple": 0.5666666626930237, "regularize": 0.36962300539016724, "step": 65 }, { "dpo_loss": 0.6181739568710327, "epoch": 0.19839395370807747, "grad_norm": 18809.239218176448, "learning_rate": 3.30188679245283e-06, "logits": -1.4349617958068848, "logps": -82.71049499511719, "loss": 439.5741, "objective": 443.4054870605469, "ranking_simple": 0.550000011920929, "regularize": 0.4068828225135803, "step": 70 }, { "dpo_loss": 0.6287192702293396, "epoch": 0.21256495040151158, "grad_norm": 20616.517476343855, "learning_rate": 3.5377358490566038e-06, "logits": -1.333349585533142, "logps": -82.84730529785156, "loss": 457.74, "objective": 467.5802307128906, "ranking_simple": 0.5625, "regularize": 0.42490166425704956, "step": 75 }, { "dpo_loss": 0.6181908249855042, "epoch": 0.22673594709494568, "grad_norm": 20195.88685314973, "learning_rate": 3.7735849056603777e-06, "logits": -1.3842099905014038, "logps": -80.95723724365234, "loss": 435.7331, "objective": 441.63299560546875, "ranking_simple": 0.5833333134651184, "regularize": 0.42700326442718506, "step": 80 }, { "dpo_loss": 0.6087173819541931, "epoch": 0.2409069437883798, "grad_norm": 15942.080314795965, "learning_rate": 4.009433962264152e-06, "logits": -1.344446063041687, "logps": -80.02598571777344, "loss": 443.4859, "objective": 464.4291076660156, "ranking_simple": 0.5666666626930237, "regularize": 0.45569831132888794, "step": 85 }, { "dpo_loss": 0.5819770693778992, "epoch": 0.25507794048181387, "grad_norm": 16092.557140676014, "learning_rate": 4.245283018867925e-06, "logits": -1.4104372262954712, "logps": -81.34741973876953, "loss": 449.1501, "objective": 444.44000244140625, "ranking_simple": 0.6041666865348816, "regularize": 0.4178715646266937, "step": 90 }, { "dpo_loss": 0.5937064290046692, "epoch": 0.269248937175248, "grad_norm": 16918.089764873057, "learning_rate": 4.481132075471699e-06, "logits": -1.3456578254699707, "logps": -83.11612701416016, "loss": 458.477, "objective": 447.3981018066406, "ranking_simple": 0.5458333492279053, "regularize": 0.41778501868247986, "step": 95 }, { "dpo_loss": 0.6062127351760864, "epoch": 0.2834199338686821, "grad_norm": 16311.430045391364, "learning_rate": 4.716981132075472e-06, "logits": -1.3139296770095825, "logps": -79.3995132446289, "loss": 444.0833, "objective": 441.1212463378906, "ranking_simple": 0.5625, "regularize": 0.4033554494380951, "step": 100 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 0.6839331984519958, "eval_logits": -1.4344693422317505, "eval_logps": -85.09794616699219, "eval_loss": 519.0431518554688, "eval_objective": 504.62091064453125, "eval_ranking_simple": 0.5383023023605347, "eval_regularize": 0.4692068099975586, "eval_runtime": 368.2123, "eval_samples_per_second": 15.725, "eval_steps_per_second": 1.312, "step": 100 }, { "dpo_loss": 0.6278888583183289, "epoch": 0.2975909305621162, "grad_norm": 16121.92097886621, "learning_rate": 4.952830188679246e-06, "logits": -1.4997217655181885, "logps": -80.34195709228516, "loss": 464.4937, "objective": 497.4685363769531, "ranking_simple": 0.5874999761581421, "regularize": 0.5123757123947144, "step": 105 }, { "dpo_loss": 0.6059397459030151, "epoch": 0.3117619272555503, "grad_norm": 14461.937244168814, "learning_rate": 4.999781286194085e-06, "logits": -1.379300832748413, "logps": -78.7112045288086, "loss": 472.4527, "objective": 461.8623046875, "ranking_simple": 0.5333333611488342, "regularize": 0.47747528553009033, "step": 110 }, { "dpo_loss": 0.6304748058319092, "epoch": 0.32593292394898443, "grad_norm": 15822.122754431652, "learning_rate": 4.998892826944418e-06, "logits": -1.303352952003479, "logps": -76.60855102539062, "loss": 466.8177, "objective": 472.0991516113281, "ranking_simple": 0.5874999761581421, "regularize": 0.48390674591064453, "step": 115 }, { "dpo_loss": 0.5859030485153198, "epoch": 0.3401039206424185, "grad_norm": 14414.401646390268, "learning_rate": 4.997321195347154e-06, "logits": -1.2155264616012573, "logps": -78.24824523925781, "loss": 478.7331, "objective": 465.13909912109375, "ranking_simple": 0.574999988079071, "regularize": 0.44416898488998413, "step": 120 }, { "dpo_loss": 0.5911449790000916, "epoch": 0.35427491733585265, "grad_norm": 12964.656141234129, "learning_rate": 4.9950668210706795e-06, "logits": -1.202903151512146, "logps": -79.2585220336914, "loss": 448.101, "objective": 439.24993896484375, "ranking_simple": 0.5541666746139526, "regularize": 0.4476715922355652, "step": 125 }, { "dpo_loss": 0.6120374202728271, "epoch": 0.3684459140292867, "grad_norm": 15154.178723672056, "learning_rate": 4.992130320438411e-06, "logits": -1.1208935976028442, "logps": -78.75408172607422, "loss": 491.5896, "objective": 508.3560485839844, "ranking_simple": 0.6000000238418579, "regularize": 0.507615864276886, "step": 130 }, { "dpo_loss": 0.5841532349586487, "epoch": 0.3826169107227208, "grad_norm": 13801.6700914244, "learning_rate": 4.988512496260302e-06, "logits": -1.0719252824783325, "logps": -79.74116516113281, "loss": 457.2002, "objective": 470.0576477050781, "ranking_simple": 0.5458333492279053, "regularize": 0.423565536737442, "step": 135 }, { "dpo_loss": 0.6198513507843018, "epoch": 0.39678790741615494, "grad_norm": 15864.353210882817, "learning_rate": 4.984214337613357e-06, "logits": -1.0588831901550293, "logps": -78.39877319335938, "loss": 473.3348, "objective": 488.9702453613281, "ranking_simple": 0.5666666626930237, "regularize": 0.49831974506378174, "step": 140 }, { "dpo_loss": 0.6141570806503296, "epoch": 0.410958904109589, "grad_norm": 13855.012204270928, "learning_rate": 4.979237019571235e-06, "logits": -1.0990999937057495, "logps": -81.36931610107422, "loss": 472.2201, "objective": 467.56060791015625, "ranking_simple": 0.6208333373069763, "regularize": 0.4914819896221161, "step": 145 }, { "dpo_loss": 0.6057680249214172, "epoch": 0.42512990080302315, "grad_norm": 12908.639969406, "learning_rate": 4.97358190288299e-06, "logits": -1.0687533617019653, "logps": -82.37923431396484, "loss": 462.7395, "objective": 476.6890869140625, "ranking_simple": 0.5666666626930237, "regularize": 0.5104095935821533, "step": 150 }, { "epoch": 0.42512990080302315, "eval_dpo_loss": 0.6978219151496887, "eval_logits": -1.114176630973816, "eval_logps": -85.36813354492188, "eval_loss": 552.14501953125, "eval_objective": 536.3590698242188, "eval_ranking_simple": 0.5367494821548462, "eval_regularize": 0.530274510383606, "eval_runtime": 367.6708, "eval_samples_per_second": 15.748, "eval_steps_per_second": 1.314, "step": 150 }, { "dpo_loss": 0.583582878112793, "epoch": 0.43930089749645723, "grad_norm": 13162.177891801954, "learning_rate": 4.967250533601059e-06, "logits": -1.109771966934204, "logps": -79.27082824707031, "loss": 471.5066, "objective": 491.6888122558594, "ranking_simple": 0.5625, "regularize": 0.5010552406311035, "step": 155 }, { "dpo_loss": 0.5951789021492004, "epoch": 0.45347189418989137, "grad_norm": 13529.150362850014, "learning_rate": 4.9602446426585845e-06, "logits": -0.9521434307098389, "logps": -78.37739562988281, "loss": 462.6479, "objective": 443.32379150390625, "ranking_simple": 0.5625, "regularize": 0.43846720457077026, "step": 160 }, { "dpo_loss": 0.5964730381965637, "epoch": 0.46764289088332545, "grad_norm": 12752.905142046688, "learning_rate": 4.952566145396197e-06, "logits": -0.9862248301506042, "logps": -77.13455963134766, "loss": 478.3803, "objective": 439.1322021484375, "ranking_simple": 0.5916666388511658, "regularize": 0.4469877779483795, "step": 165 }, { "dpo_loss": 0.6038484573364258, "epoch": 0.4818138875767596, "grad_norm": 10136.08564624282, "learning_rate": 4.944217141038379e-06, "logits": -0.9612207412719727, "logps": -76.84504699707031, "loss": 464.1854, "objective": 468.0538635253906, "ranking_simple": 0.5375000238418579, "regularize": 0.48102495074272156, "step": 170 }, { "dpo_loss": 0.6333703994750977, "epoch": 0.49598488427019366, "grad_norm": 12679.513797703847, "learning_rate": 4.935199912119558e-06, "logits": -0.8710211515426636, "logps": -78.75601959228516, "loss": 464.1875, "objective": 483.89111328125, "ranking_simple": 0.5458333492279053, "regularize": 0.514005720615387, "step": 175 }, { "dpo_loss": 0.5934690833091736, "epoch": 0.5101558809636277, "grad_norm": 11848.627128456088, "learning_rate": 4.925516923860083e-06, "logits": -0.8811076879501343, "logps": -79.12806701660156, "loss": 454.5442, "objective": 488.2110900878906, "ranking_simple": 0.5333333611488342, "regularize": 0.4675270617008209, "step": 180 }, { "dpo_loss": 0.5724626183509827, "epoch": 0.5243268776570619, "grad_norm": 11713.664064429433, "learning_rate": 4.9151708234922605e-06, "logits": -0.877926230430603, "logps": -78.4993667602539, "loss": 458.0594, "objective": 457.6112060546875, "ranking_simple": 0.5666666626930237, "regularize": 0.4596666693687439, "step": 185 }, { "dpo_loss": 0.5861265659332275, "epoch": 0.538497874350496, "grad_norm": 11338.428858471121, "learning_rate": 4.904164439536626e-06, "logits": -0.9104651808738708, "logps": -78.65680694580078, "loss": 487.9273, "objective": 496.1139221191406, "ranking_simple": 0.6083333492279053, "regularize": 0.48734599351882935, "step": 190 }, { "dpo_loss": 0.555807888507843, "epoch": 0.5526688710439301, "grad_norm": 12189.271370638136, "learning_rate": 4.8925007810286555e-06, "logits": -0.8752073645591736, "logps": -79.23562622070312, "loss": 470.8679, "objective": 470.63421630859375, "ranking_simple": 0.612500011920929, "regularize": 0.4960786998271942, "step": 195 }, { "dpo_loss": 0.5812997817993164, "epoch": 0.5668398677373642, "grad_norm": 11198.141597437847, "learning_rate": 4.880183036696123e-06, "logits": -0.8242141008377075, "logps": -78.40170288085938, "loss": 445.5849, "objective": 460.94390869140625, "ranking_simple": 0.5541666746139526, "regularize": 0.44002261757850647, "step": 200 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 0.706484317779541, "eval_logits": -0.8469271063804626, "eval_logps": -81.4329833984375, "eval_loss": 561.5618896484375, "eval_objective": 550.3474731445312, "eval_ranking_simple": 0.533643901348114, "eval_regularize": 0.5524560809135437, "eval_runtime": 367.805, "eval_samples_per_second": 15.742, "eval_steps_per_second": 1.313, "step": 200 }, { "dpo_loss": 0.584477961063385, "epoch": 0.5810108644307983, "grad_norm": 11490.315926548616, "learning_rate": 4.867214574087338e-06, "logits": -0.6800127625465393, "logps": -77.10325622558594, "loss": 450.6568, "objective": 437.6501159667969, "ranking_simple": 0.5874999761581421, "regularize": 0.45991986989974976, "step": 205 }, { "dpo_loss": 0.580778181552887, "epoch": 0.5951818611242324, "grad_norm": 12820.159548843532, "learning_rate": 4.853598938650487e-06, "logits": -0.7779163718223572, "logps": -75.716552734375, "loss": 486.1006, "objective": 472.6957702636719, "ranking_simple": 0.5958333611488342, "regularize": 0.47449955344200134, "step": 210 }, { "dpo_loss": 0.5629280805587769, "epoch": 0.6093528578176665, "grad_norm": 12199.139102166855, "learning_rate": 4.8393398527643495e-06, "logits": -0.9319173097610474, "logps": -74.01848602294922, "loss": 450.7458, "objective": 442.0367736816406, "ranking_simple": 0.5541666746139526, "regularize": 0.4502807855606079, "step": 215 }, { "dpo_loss": 0.551164984703064, "epoch": 0.6235238545111006, "grad_norm": 12032.737647695545, "learning_rate": 4.824441214720629e-06, "logits": -1.0898358821868896, "logps": -73.6335678100586, "loss": 444.6644, "objective": 445.2054748535156, "ranking_simple": 0.5333333611488342, "regularize": 0.4597654938697815, "step": 220 }, { "dpo_loss": 0.583297848701477, "epoch": 0.6376948512045347, "grad_norm": 9292.574396967713, "learning_rate": 4.808907097658205e-06, "logits": -1.052019476890564, "logps": -75.00895690917969, "loss": 448.7449, "objective": 459.1669921875, "ranking_simple": 0.5416666865348816, "regularize": 0.4535444974899292, "step": 225 }, { "dpo_loss": 0.5654311776161194, "epoch": 0.6518658478979689, "grad_norm": 10190.172018933226, "learning_rate": 4.7927417484495756e-06, "logits": -0.8342668414115906, "logps": -75.51798248291016, "loss": 436.2228, "objective": 409.9478454589844, "ranking_simple": 0.6416666507720947, "regularize": 0.4194689691066742, "step": 230 }, { "dpo_loss": 0.6019126772880554, "epoch": 0.6660368445914029, "grad_norm": 12000.878566078438, "learning_rate": 4.7759495865398035e-06, "logits": -0.64374178647995, "logps": -77.013916015625, "loss": 457.7021, "objective": 454.7564697265625, "ranking_simple": 0.5958333611488342, "regularize": 0.4394772946834564, "step": 235 }, { "dpo_loss": 0.5635860562324524, "epoch": 0.680207841284837, "grad_norm": 11997.018653451396, "learning_rate": 4.758535202738287e-06, "logits": -0.8103247284889221, "logps": -77.16266632080078, "loss": 448.4921, "objective": 470.2117004394531, "ranking_simple": 0.6000000238418579, "regularize": 0.4310372471809387, "step": 240 }, { "dpo_loss": 0.612980306148529, "epoch": 0.6943788379782712, "grad_norm": 12037.283885838251, "learning_rate": 4.740503357963676e-06, "logits": -0.8841701745986938, "logps": -77.16488647460938, "loss": 431.5274, "objective": 442.6828308105469, "ranking_simple": 0.5916666388511658, "regularize": 0.4170995056629181, "step": 245 }, { "dpo_loss": 0.5597648024559021, "epoch": 0.7085498346717053, "grad_norm": 10567.387466711676, "learning_rate": 4.721858981942284e-06, "logits": -0.8271477818489075, "logps": -75.92268371582031, "loss": 445.1676, "objective": 432.1976013183594, "ranking_simple": 0.6083333492279053, "regularize": 0.39139440655708313, "step": 250 }, { "epoch": 0.7085498346717053, "eval_dpo_loss": 0.7070175409317017, "eval_logits": -1.0391225814819336, "eval_logps": -80.71741485595703, "eval_loss": 572.16943359375, "eval_objective": 563.6924438476562, "eval_ranking_simple": 0.5408902764320374, "eval_regularize": 0.5829753279685974, "eval_runtime": 368.3728, "eval_samples_per_second": 15.718, "eval_steps_per_second": 1.311, "step": 250 }, { "dpo_loss": 0.5912286639213562, "epoch": 0.7227208313651393, "grad_norm": 10004.706428299638, "learning_rate": 4.702607171860354e-06, "logits": -1.0291627645492554, "logps": -75.54689025878906, "loss": 430.9432, "objective": 432.97802734375, "ranking_simple": 0.5249999761581421, "regularize": 0.4280008375644684, "step": 255 }, { "dpo_loss": 0.5518169403076172, "epoch": 0.7368918280585735, "grad_norm": 10338.762017019879, "learning_rate": 4.682753190970533e-06, "logits": -0.9816469550132751, "logps": -78.38555145263672, "loss": 441.5456, "objective": 472.0696716308594, "ranking_simple": 0.5791666507720947, "regularize": 0.4596433639526367, "step": 260 }, { "dpo_loss": 0.5279027819633484, "epoch": 0.7510628247520076, "grad_norm": 9975.569885392504, "learning_rate": 4.6623024671529555e-06, "logits": -0.8709200024604797, "logps": -78.86503601074219, "loss": 415.3095, "objective": 424.6243896484375, "ranking_simple": 0.637499988079071, "regularize": 0.4246416687965393, "step": 265 }, { "dpo_loss": 0.5645706057548523, "epoch": 0.7652338214454416, "grad_norm": 10607.895202202406, "learning_rate": 4.641260591431315e-06, "logits": -0.8247819542884827, "logps": -78.76771545410156, "loss": 434.6699, "objective": 440.14715576171875, "ranking_simple": 0.5333333611488342, "regularize": 0.42400452494621277, "step": 270 }, { "dpo_loss": 0.587860107421875, "epoch": 0.7794048181388757, "grad_norm": 9977.49094753435, "learning_rate": 4.61963331644433e-06, "logits": -0.6680871844291687, "logps": -77.70354461669922, "loss": 438.6425, "objective": 448.8917236328125, "ranking_simple": 0.5833333134651184, "regularize": 0.46749448776245117, "step": 275 }, { "dpo_loss": 0.5520439147949219, "epoch": 0.7935758148323099, "grad_norm": 11654.173000851612, "learning_rate": 4.597426554873037e-06, "logits": -0.7018941640853882, "logps": -78.6235122680664, "loss": 443.7228, "objective": 438.81072998046875, "ranking_simple": 0.5791666507720947, "regularize": 0.4298190176486969, "step": 280 }, { "dpo_loss": 0.5620520710945129, "epoch": 0.807746811525744, "grad_norm": 11208.671816444528, "learning_rate": 4.574646377824316e-06, "logits": -0.7278221845626831, "logps": -79.00438690185547, "loss": 428.4479, "objective": 430.4336242675781, "ranking_simple": 0.5375000238418579, "regularize": 0.413059800863266, "step": 285 }, { "dpo_loss": 0.5554956793785095, "epoch": 0.821917808219178, "grad_norm": 10809.870000518249, "learning_rate": 4.551299013171111e-06, "logits": -0.7191876769065857, "logps": -77.41136169433594, "loss": 399.2666, "objective": 398.6150817871094, "ranking_simple": 0.5791666507720947, "regularize": 0.3730964958667755, "step": 290 }, { "dpo_loss": 0.5479804873466492, "epoch": 0.8360888049126122, "grad_norm": 10163.479021180265, "learning_rate": 4.5273908438498e-06, "logits": -0.7566318511962891, "logps": -78.29537963867188, "loss": 420.4245, "objective": 438.2878112792969, "ranking_simple": 0.5541666746139526, "regularize": 0.43994590640068054, "step": 295 }, { "dpo_loss": 0.564975917339325, "epoch": 0.8502598016060463, "grad_norm": 10246.9159310537, "learning_rate": 4.502928406115152e-06, "logits": -0.7081549167633057, "logps": -79.06087493896484, "loss": 413.9375, "objective": 419.59674072265625, "ranking_simple": 0.6291666626930237, "regularize": 0.3981608748435974, "step": 300 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 0.7031394839286804, "eval_logits": -0.7451677918434143, "eval_logps": -84.88603210449219, "eval_loss": 567.0264282226562, "eval_objective": 558.1202392578125, "eval_ranking_simple": 0.5398550629615784, "eval_regularize": 0.573249101638794, "eval_runtime": 374.6781, "eval_samples_per_second": 15.453, "eval_steps_per_second": 1.289, "step": 300 }, { "dpo_loss": 0.557949423789978, "epoch": 0.8644307982994804, "grad_norm": 10481.083205902773, "learning_rate": 4.477918387753388e-06, "logits": -0.6799180507659912, "logps": -79.46160125732422, "loss": 416.0437, "objective": 436.0469970703125, "ranking_simple": 0.625, "regularize": 0.3994833528995514, "step": 305 }, { "dpo_loss": 0.537316083908081, "epoch": 0.8786017949929145, "grad_norm": 10330.127581982748, "learning_rate": 4.452367626253805e-06, "logits": -0.6589821577072144, "logps": -77.94523620605469, "loss": 412.2728, "objective": 435.38397216796875, "ranking_simple": 0.5958333611488342, "regularize": 0.42988669872283936, "step": 310 }, { "dpo_loss": 0.5410233736038208, "epoch": 0.8927727916863486, "grad_norm": 9680.592388360066, "learning_rate": 4.426283106939474e-06, "logits": -0.5960977077484131, "logps": -77.44992065429688, "loss": 407.9562, "objective": 391.1397705078125, "ranking_simple": 0.5416666865348816, "regularize": 0.3800966143608093, "step": 315 }, { "dpo_loss": 0.5544202923774719, "epoch": 0.9069437883797827, "grad_norm": 10390.175171602317, "learning_rate": 4.399671961057523e-06, "logits": -0.4942823052406311, "logps": -79.33159637451172, "loss": 418.3425, "objective": 413.7690124511719, "ranking_simple": 0.5874999761581421, "regularize": 0.3666258752346039, "step": 320 }, { "dpo_loss": 0.5637380480766296, "epoch": 0.9211147850732169, "grad_norm": 9093.12442680975, "learning_rate": 4.372541463829524e-06, "logits": -0.5750992298126221, "logps": -79.70758819580078, "loss": 400.4521, "objective": 385.9770202636719, "ranking_simple": 0.5874999761581421, "regularize": 0.3547678589820862, "step": 325 }, { "dpo_loss": 0.527768075466156, "epoch": 0.9352857817666509, "grad_norm": 9514.787690422123, "learning_rate": 4.3448990324625244e-06, "logits": -0.5246156454086304, "logps": -78.73555755615234, "loss": 404.0078, "objective": 408.9071960449219, "ranking_simple": 0.6166666746139526, "regularize": 0.38706058263778687, "step": 330 }, { "dpo_loss": 0.5262110829353333, "epoch": 0.949456778460085, "grad_norm": 8108.6927584156865, "learning_rate": 4.316752224121252e-06, "logits": -0.5318282246589661, "logps": -78.84705352783203, "loss": 395.0854, "objective": 391.35888671875, "ranking_simple": 0.625, "regularize": 0.3775762617588043, "step": 335 }, { "dpo_loss": 0.5850500464439392, "epoch": 0.9636277751535192, "grad_norm": 9909.017294058862, "learning_rate": 4.288108733862064e-06, "logits": -0.5778205394744873, "logps": -78.09910583496094, "loss": 425.4681, "objective": 410.2987365722656, "ranking_simple": 0.5958333611488342, "regularize": 0.3491324782371521, "step": 340 }, { "dpo_loss": 0.5264328718185425, "epoch": 0.9777987718469532, "grad_norm": 10518.367912162912, "learning_rate": 4.2589763925291924e-06, "logits": -0.6430075168609619, "logps": -76.8382797241211, "loss": 386.2568, "objective": 371.39697265625, "ranking_simple": 0.6541666388511658, "regularize": 0.3261619508266449, "step": 345 }, { "dpo_loss": 0.5298411846160889, "epoch": 0.9919697685403873, "grad_norm": 8822.17526005844, "learning_rate": 4.229363164613874e-06, "logits": -0.5401391983032227, "logps": -78.81169128417969, "loss": 385.7652, "objective": 396.1875305175781, "ranking_simple": 0.6583333611488342, "regularize": 0.4019112288951874, "step": 350 }, { "epoch": 0.9919697685403873, "eval_dpo_loss": 0.7081769108772278, "eval_logits": -0.6075623035430908, "eval_logps": -82.63887786865234, "eval_loss": 581.0134887695312, "eval_objective": 565.1652221679688, "eval_ranking_simple": 0.5383023023605347, "eval_regularize": 0.5905848741531372, "eval_runtime": 368.0626, "eval_samples_per_second": 15.731, "eval_steps_per_second": 1.312, "step": 350 }, { "dpo_loss": 0.5235918760299683, "epoch": 1.0061407652338215, "grad_norm": 7985.929619871145, "learning_rate": 4.199277146076933e-06, "logits": -0.5844969153404236, "logps": -77.1658935546875, "loss": 395.3726, "objective": 383.5928955078125, "ranking_simple": 0.6208333373069763, "regularize": 0.3756142854690552, "step": 355 }, { "dpo_loss": 0.5242210030555725, "epoch": 1.0203117619272555, "grad_norm": 9633.18732259904, "learning_rate": 4.168726562135432e-06, "logits": -0.5656494498252869, "logps": -78.61763000488281, "loss": 382.5271, "objective": 390.3468322753906, "ranking_simple": 0.5958333611488342, "regularize": 0.3490845561027527, "step": 360 }, { "dpo_loss": 0.5211578011512756, "epoch": 1.0344827586206897, "grad_norm": 9644.11378605678, "learning_rate": 4.137719765013974e-06, "logits": -0.5837284922599792, "logps": -78.80403900146484, "loss": 393.9499, "objective": 399.89215087890625, "ranking_simple": 0.5833333134651184, "regularize": 0.3770846724510193, "step": 365 }, { "dpo_loss": 0.5336027145385742, "epoch": 1.0486537553141237, "grad_norm": 8251.35503586343, "learning_rate": 4.106265231661292e-06, "logits": -0.5281592607498169, "logps": -76.34611511230469, "loss": 364.8078, "objective": 364.66595458984375, "ranking_simple": 0.5916666388511658, "regularize": 0.337600976228714, "step": 370 }, { "dpo_loss": 0.5174949765205383, "epoch": 1.0628247520075578, "grad_norm": 9028.93002381266, "learning_rate": 4.074371561432731e-06, "logits": -0.5678179860115051, "logps": -76.8271255493164, "loss": 372.633, "objective": 343.89019775390625, "ranking_simple": 0.612500011920929, "regularize": 0.30460023880004883, "step": 375 }, { "dpo_loss": 0.5492153167724609, "epoch": 1.076995748700992, "grad_norm": 8789.25650294624, "learning_rate": 4.042047473739278e-06, "logits": -0.5127583146095276, "logps": -78.71736907958984, "loss": 373.876, "objective": 406.1351013183594, "ranking_simple": 0.6291666626930237, "regularize": 0.36652448773384094, "step": 380 }, { "dpo_loss": 0.5343239307403564, "epoch": 1.091166745394426, "grad_norm": 8695.398130642287, "learning_rate": 4.009301805663752e-06, "logits": -0.439236581325531, "logps": -77.64140319824219, "loss": 381.8216, "objective": 398.8446960449219, "ranking_simple": 0.6000000238418579, "regularize": 0.3279392123222351, "step": 385 }, { "dpo_loss": 0.5177706480026245, "epoch": 1.10533774208786, "grad_norm": 9041.130741001136, "learning_rate": 3.976143509544843e-06, "logits": -0.4288846254348755, "logps": -79.35343933105469, "loss": 357.1327, "objective": 361.1878967285156, "ranking_simple": 0.6416666507720947, "regularize": 0.3373713791370392, "step": 390 }, { "dpo_loss": 0.5048284530639648, "epoch": 1.1195087387812943, "grad_norm": 8902.672627868116, "learning_rate": 3.9425816505296254e-06, "logits": -0.48021775484085083, "logps": -78.194091796875, "loss": 394.8973, "objective": 412.3692932128906, "ranking_simple": 0.6000000238418579, "regularize": 0.3603326082229614, "step": 395 }, { "dpo_loss": 0.5264464020729065, "epoch": 1.1336797354747283, "grad_norm": 8995.938641641998, "learning_rate": 3.908625404095242e-06, "logits": -0.4987303912639618, "logps": -77.11076354980469, "loss": 376.3251, "objective": 368.0682373046875, "ranking_simple": 0.625, "regularize": 0.31286415457725525, "step": 400 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 0.7118101716041565, "eval_logits": -0.5273135900497437, "eval_logps": -81.622314453125, "eval_loss": 586.0215454101562, "eval_objective": 571.4174194335938, "eval_ranking_simple": 0.5367494821548462, "eval_regularize": 0.5996229648590088, "eval_runtime": 367.4788, "eval_samples_per_second": 15.756, "eval_steps_per_second": 1.314, "step": 400 }, { "dpo_loss": 0.5221129059791565, "epoch": 1.1478507321681626, "grad_norm": 8887.877038015888, "learning_rate": 3.8742840535404155e-06, "logits": -0.4772927165031433, "logps": -78.21456909179688, "loss": 374.1795, "objective": 403.9602355957031, "ranking_simple": 0.5791666507720947, "regularize": 0.35953524708747864, "step": 405 }, { "dpo_loss": 0.5320737361907959, "epoch": 1.1620217288615966, "grad_norm": 8653.742076980236, "learning_rate": 3.839566987447492e-06, "logits": -0.5261733531951904, "logps": -77.4281005859375, "loss": 357.8752, "objective": 369.8106689453125, "ranking_simple": 0.6041666865348816, "regularize": 0.31248220801353455, "step": 410 }, { "dpo_loss": 0.5130491852760315, "epoch": 1.1761927255550306, "grad_norm": 8220.944462330583, "learning_rate": 3.8044836971156935e-06, "logits": -0.4498496651649475, "logps": -76.78229522705078, "loss": 367.3453, "objective": 342.2223815917969, "ranking_simple": 0.6166666746139526, "regularize": 0.32919442653656006, "step": 415 }, { "dpo_loss": 0.5148084163665771, "epoch": 1.1903637222484649, "grad_norm": 8364.696648518799, "learning_rate": 3.7690437739662928e-06, "logits": -0.4175103008747101, "logps": -78.17424011230469, "loss": 381.6126, "objective": 368.6730041503906, "ranking_simple": 0.5874999761581421, "regularize": 0.34451645612716675, "step": 420 }, { "dpo_loss": 0.5163763761520386, "epoch": 1.204534718941899, "grad_norm": 8541.974606101103, "learning_rate": 3.7332569069204127e-06, "logits": -0.45798221230506897, "logps": -76.5196304321289, "loss": 370.1389, "objective": 370.2024230957031, "ranking_simple": 0.6166666746139526, "regularize": 0.3256681561470032, "step": 425 }, { "dpo_loss": 0.5450712442398071, "epoch": 1.2187057156353331, "grad_norm": 9487.349462942979, "learning_rate": 3.697132879750174e-06, "logits": -0.48889076709747314, "logps": -78.89514923095703, "loss": 384.2139, "objective": 399.7441101074219, "ranking_simple": 0.612500011920929, "regularize": 0.3587479293346405, "step": 430 }, { "dpo_loss": 0.4979787766933441, "epoch": 1.2328767123287672, "grad_norm": 9248.918944555557, "learning_rate": 3.6606815684039098e-06, "logits": -0.45076984167099, "logps": -78.12223815917969, "loss": 376.6027, "objective": 365.3465881347656, "ranking_simple": 0.6708333492279053, "regularize": 0.33380749821662903, "step": 435 }, { "dpo_loss": 0.5080611109733582, "epoch": 1.2470477090222012, "grad_norm": 8952.956886414895, "learning_rate": 3.6239129383061764e-06, "logits": -0.5257605314254761, "logps": -77.80259704589844, "loss": 367.3115, "objective": 355.6669921875, "ranking_simple": 0.6458333134651184, "regularize": 0.3250352144241333, "step": 440 }, { "dpo_loss": 0.5034739375114441, "epoch": 1.2612187057156352, "grad_norm": 8595.813908218257, "learning_rate": 3.586837041633312e-06, "logits": -0.5648617148399353, "logps": -78.69916534423828, "loss": 353.7886, "objective": 345.4654235839844, "ranking_simple": 0.6625000238418579, "regularize": 0.3045599162578583, "step": 445 }, { "dpo_loss": 0.5136024951934814, "epoch": 1.2753897024090695, "grad_norm": 8799.318521509487, "learning_rate": 3.5494640145652647e-06, "logits": -0.6497453451156616, "logps": -77.91305541992188, "loss": 348.4717, "objective": 341.05877685546875, "ranking_simple": 0.5833333134651184, "regularize": 0.2908380925655365, "step": 450 }, { "epoch": 1.2753897024090695, "eval_dpo_loss": 0.7055455446243286, "eval_logits": -0.6517468094825745, "eval_logps": -81.88983154296875, "eval_loss": 576.5939331054688, "eval_objective": 563.9976806640625, "eval_ranking_simple": 0.5372670888900757, "eval_regularize": 0.586566150188446, "eval_runtime": 372.0863, "eval_samples_per_second": 15.561, "eval_steps_per_second": 1.298, "step": 450 }, { "dpo_loss": 0.5133717656135559, "epoch": 1.2895606991025035, "grad_norm": 8447.712740682113, "learning_rate": 3.511804074514468e-06, "logits": -0.50225830078125, "logps": -78.78018188476562, "loss": 350.7452, "objective": 353.5893249511719, "ranking_simple": 0.5583333373069763, "regularize": 0.30045947432518005, "step": 455 }, { "dpo_loss": 0.5129916071891785, "epoch": 1.3037316957959377, "grad_norm": 8683.9771262322, "learning_rate": 3.4738675173325008e-06, "logits": -0.5019214153289795, "logps": -78.0320816040039, "loss": 358.7841, "objective": 359.1448669433594, "ranking_simple": 0.5583333373069763, "regularize": 0.28918400406837463, "step": 460 }, { "dpo_loss": 0.5200445055961609, "epoch": 1.3179026924893718, "grad_norm": 8708.589588634271, "learning_rate": 3.435664714495301e-06, "logits": -0.4990668296813965, "logps": -76.45315551757812, "loss": 363.9146, "objective": 359.3269348144531, "ranking_simple": 0.5625, "regularize": 0.317513108253479, "step": 465 }, { "dpo_loss": 0.5191565752029419, "epoch": 1.3320736891828058, "grad_norm": 7892.018349995868, "learning_rate": 3.397206110267713e-06, "logits": -0.5707500576972961, "logps": -75.51515197753906, "loss": 369.7767, "objective": 370.79046630859375, "ranking_simple": 0.6083333492279053, "regularize": 0.30417945981025696, "step": 470 }, { "dpo_loss": 0.5160278081893921, "epoch": 1.34624468587624, "grad_norm": 8622.437524812693, "learning_rate": 3.3585022188481247e-06, "logits": -0.5167524814605713, "logps": -77.0745849609375, "loss": 347.9223, "objective": 363.10107421875, "ranking_simple": 0.6583333611488342, "regularize": 0.3366948366165161, "step": 475 }, { "dpo_loss": 0.5038079023361206, "epoch": 1.360415682569674, "grad_norm": 8520.652876751588, "learning_rate": 3.3195636214939943e-06, "logits": -0.5720607042312622, "logps": -76.88468170166016, "loss": 352.4882, "objective": 360.6014709472656, "ranking_simple": 0.5833333134651184, "regularize": 0.31744828820228577, "step": 480 }, { "dpo_loss": 0.5260137915611267, "epoch": 1.3745866792631083, "grad_norm": 8357.649136802573, "learning_rate": 3.2804009636290403e-06, "logits": -0.5760036706924438, "logps": -75.65044403076172, "loss": 352.4908, "objective": 335.92327880859375, "ranking_simple": 0.5874999761581421, "regularize": 0.27229636907577515, "step": 485 }, { "dpo_loss": 0.5316032767295837, "epoch": 1.3887576759565423, "grad_norm": 8377.120289104301, "learning_rate": 3.2410249519328848e-06, "logits": -0.5220092535018921, "logps": -78.16001892089844, "loss": 350.6266, "objective": 356.1325988769531, "ranking_simple": 0.5916666388511658, "regularize": 0.2998295724391937, "step": 490 }, { "dpo_loss": 0.5069059729576111, "epoch": 1.4029286726499763, "grad_norm": 9790.99603957699, "learning_rate": 3.201446351413958e-06, "logits": -0.5315040349960327, "logps": -78.9278793334961, "loss": 355.1208, "objective": 340.3614196777344, "ranking_simple": 0.5916666388511658, "regularize": 0.2911123037338257, "step": 495 }, { "dpo_loss": 0.5013086199760437, "epoch": 1.4170996693434104, "grad_norm": 9096.333510047558, "learning_rate": 3.1616759824664543e-06, "logits": -0.47489532828330994, "logps": -78.39351654052734, "loss": 351.4185, "objective": 349.8325500488281, "ranking_simple": 0.6083333492279053, "regularize": 0.2930639684200287, "step": 500 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 0.7128105759620667, "eval_logits": -0.5594385862350464, "eval_logps": -82.85633087158203, "eval_loss": 584.3820190429688, "eval_objective": 570.8920288085938, "eval_ranking_simple": 0.5393374562263489, "eval_regularize": 0.597196638584137, "eval_runtime": 367.2146, "eval_samples_per_second": 15.767, "eval_steps_per_second": 1.315, "step": 500 }, { "dpo_loss": 0.502229630947113, "epoch": 1.4312706660368446, "grad_norm": 7569.16735798095, "learning_rate": 3.121724717912138e-06, "logits": -0.4940933585166931, "logps": -79.02289581298828, "loss": 350.6974, "objective": 343.7030944824219, "ranking_simple": 0.6291666626930237, "regularize": 0.2822100818157196, "step": 505 }, { "dpo_loss": 0.5396325588226318, "epoch": 1.4454416627302786, "grad_norm": 8970.009407427333, "learning_rate": 3.081603480027826e-06, "logits": -0.5547798275947571, "logps": -79.63969421386719, "loss": 354.8519, "objective": 349.9094543457031, "ranking_simple": 0.6000000238418579, "regularize": 0.27683576941490173, "step": 510 }, { "dpo_loss": 0.5251208543777466, "epoch": 1.4596126594237129, "grad_norm": 8332.379052556926, "learning_rate": 3.04132323755935e-06, "logits": -0.627532422542572, "logps": -79.98551940917969, "loss": 345.0198, "objective": 359.2087707519531, "ranking_simple": 0.5416666865348816, "regularize": 0.2865109145641327, "step": 515 }, { "dpo_loss": 0.5008211135864258, "epoch": 1.473783656117147, "grad_norm": 8229.897145458208, "learning_rate": 3.0008950027228035e-06, "logits": -0.5891799330711365, "logps": -76.67542266845703, "loss": 342.313, "objective": 343.9622802734375, "ranking_simple": 0.5916666388511658, "regularize": 0.27687618136405945, "step": 520 }, { "dpo_loss": 0.4947444498538971, "epoch": 1.487954652810581, "grad_norm": 7702.6950001891755, "learning_rate": 2.960329828193918e-06, "logits": -0.5145970582962036, "logps": -75.67664337158203, "loss": 332.768, "objective": 329.7011413574219, "ranking_simple": 0.6166666746139526, "regularize": 0.2696382403373718, "step": 525 }, { "dpo_loss": 0.5160828828811646, "epoch": 1.5021256495040152, "grad_norm": 8386.762596966919, "learning_rate": 2.9196388040863695e-06, "logits": -0.616746187210083, "logps": -78.44214630126953, "loss": 362.1123, "objective": 358.41510009765625, "ranking_simple": 0.5791666507720947, "regularize": 0.29886895418167114, "step": 530 }, { "dpo_loss": 0.495453417301178, "epoch": 1.5162966461974492, "grad_norm": 7823.009103949036, "learning_rate": 2.8788330549198512e-06, "logits": -0.6062889099121094, "logps": -77.4250717163086, "loss": 331.5395, "objective": 311.8442687988281, "ranking_simple": 0.6000000238418579, "regularize": 0.24911071360111237, "step": 535 }, { "dpo_loss": 0.5264947414398193, "epoch": 1.5304676428908834, "grad_norm": 8960.269885367992, "learning_rate": 2.8379237365787426e-06, "logits": -0.5448920130729675, "logps": -77.27252960205078, "loss": 334.277, "objective": 340.4452209472656, "ranking_simple": 0.6208333373069763, "regularize": 0.2655259668827057, "step": 540 }, { "dpo_loss": 0.5327333211898804, "epoch": 1.5446386395843175, "grad_norm": 8003.393273489204, "learning_rate": 2.7969220332622004e-06, "logits": -0.6398530602455139, "logps": -76.85224151611328, "loss": 331.0988, "objective": 332.23486328125, "ranking_simple": 0.6541666388511658, "regularize": 0.2610936164855957, "step": 545 }, { "dpo_loss": 0.5121258497238159, "epoch": 1.5588096362777515, "grad_norm": 7510.163531162746, "learning_rate": 2.7558391544265127e-06, "logits": -0.6813774704933167, "logps": -76.5182876586914, "loss": 326.458, "objective": 333.58294677734375, "ranking_simple": 0.5666666626930237, "regularize": 0.27006784081459045, "step": 550 }, { "epoch": 1.5588096362777515, "eval_dpo_loss": 0.7086328268051147, "eval_logits": -0.6993709802627563, "eval_logps": -80.56141662597656, "eval_loss": 578.3502807617188, "eval_objective": 565.96826171875, "eval_ranking_simple": 0.5367494821548462, "eval_regularize": 0.5877013206481934, "eval_runtime": 367.856, "eval_samples_per_second": 15.74, "eval_steps_per_second": 1.313, "step": 550 }, { "dpo_loss": 0.5160035490989685, "epoch": 1.5729806329711855, "grad_norm": 8802.921874841602, "learning_rate": 2.714686331720543e-06, "logits": -0.5305084586143494, "logps": -76.84326934814453, "loss": 342.895, "objective": 352.5166320800781, "ranking_simple": 0.612500011920929, "regularize": 0.26788362860679626, "step": 555 }, { "dpo_loss": 0.5147711634635925, "epoch": 1.5871516296646198, "grad_norm": 7348.156155946241, "learning_rate": 2.6734748159151104e-06, "logits": -0.5309932231903076, "logps": -76.01145935058594, "loss": 327.5288, "objective": 325.1703796386719, "ranking_simple": 0.5958333611488342, "regularize": 0.25214916467666626, "step": 560 }, { "dpo_loss": 0.5153753161430359, "epoch": 1.601322626358054, "grad_norm": 8906.64379732216, "learning_rate": 2.632215873827142e-06, "logits": -0.4652445912361145, "logps": -76.68761444091797, "loss": 354.5548, "objective": 364.25689697265625, "ranking_simple": 0.5333333611488342, "regularize": 0.28750428557395935, "step": 565 }, { "dpo_loss": 0.519721508026123, "epoch": 1.615493623051488, "grad_norm": 8653.666075373956, "learning_rate": 2.5909207852394363e-06, "logits": -0.5437235236167908, "logps": -77.414794921875, "loss": 333.0782, "objective": 335.1112365722656, "ranking_simple": 0.6166666746139526, "regularize": 0.25310030579566956, "step": 570 }, { "dpo_loss": 0.5159035325050354, "epoch": 1.629664619744922, "grad_norm": 9098.50496014823, "learning_rate": 2.5496008398168844e-06, "logits": -0.5154822468757629, "logps": -74.86051177978516, "loss": 338.3886, "objective": 360.5445861816406, "ranking_simple": 0.6333333253860474, "regularize": 0.293546199798584, "step": 575 }, { "dpo_loss": 0.5085076093673706, "epoch": 1.643835616438356, "grad_norm": 7622.784591036375, "learning_rate": 2.508267334019988e-06, "logits": -0.5285104513168335, "logps": -75.09459686279297, "loss": 326.7957, "objective": 322.0632019042969, "ranking_simple": 0.6166666746139526, "regularize": 0.23723416030406952, "step": 580 }, { "dpo_loss": 0.5216612219810486, "epoch": 1.6580066131317903, "grad_norm": 8435.901879905807, "learning_rate": 2.46693156801652e-06, "logits": -0.43548285961151123, "logps": -74.49349212646484, "loss": 324.9342, "objective": 314.76617431640625, "ranking_simple": 0.5249999761581421, "regularize": 0.23195335268974304, "step": 585 }, { "dpo_loss": 0.5121405720710754, "epoch": 1.6721776098252243, "grad_norm": 8829.097116710429, "learning_rate": 2.4256048425921693e-06, "logits": -0.4449107050895691, "logps": -75.21448516845703, "loss": 332.8984, "objective": 336.7486572265625, "ranking_simple": 0.6208333373069763, "regularize": 0.25163254141807556, "step": 590 }, { "dpo_loss": 0.5095264911651611, "epoch": 1.6863486065186586, "grad_norm": 8981.762264761135, "learning_rate": 2.384298456061023e-06, "logits": -0.4447081387042999, "logps": -75.95592498779297, "loss": 329.941, "objective": 336.85479736328125, "ranking_simple": 0.6083333492279053, "regularize": 0.263884961605072, "step": 595 }, { "dpo_loss": 0.5105345845222473, "epoch": 1.7005196032120926, "grad_norm": 8799.189125731713, "learning_rate": 2.3430237011767166e-06, "logits": -0.5217716693878174, "logps": -76.75984191894531, "loss": 329.0151, "objective": 342.8132019042969, "ranking_simple": 0.6291666626930237, "regularize": 0.2702232301235199, "step": 600 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 0.7084978222846985, "eval_logits": -0.593561053276062, "eval_logps": -80.32785034179688, "eval_loss": 578.38671875, "eval_objective": 566.0594482421875, "eval_ranking_simple": 0.5388198494911194, "eval_regularize": 0.5912774801254272, "eval_runtime": 369.5452, "eval_samples_per_second": 15.668, "eval_steps_per_second": 1.307, "step": 600 }, { "dpo_loss": 0.5166282653808594, "epoch": 1.7146905999055266, "grad_norm": 8773.738115986773, "learning_rate": 2.30179186204511e-06, "logits": -0.4922519624233246, "logps": -77.39384460449219, "loss": 324.8781, "objective": 324.775390625, "ranking_simple": 0.5666666626930237, "regularize": 0.23106712102890015, "step": 605 }, { "dpo_loss": 0.5313800573348999, "epoch": 1.7288615965989607, "grad_norm": 9468.956357202407, "learning_rate": 2.2606142110393248e-06, "logits": -0.5464334487915039, "logps": -75.34829711914062, "loss": 329.8079, "objective": 328.7645568847656, "ranking_simple": 0.6291666626930237, "regularize": 0.2314998209476471, "step": 610 }, { "dpo_loss": 0.5274057984352112, "epoch": 1.743032593292395, "grad_norm": 8788.596472210756, "learning_rate": 2.2195020057179897e-06, "logits": -0.5443993210792542, "logps": -75.97472381591797, "loss": 328.8101, "objective": 338.30584716796875, "ranking_simple": 0.5958333611488342, "regularize": 0.255048006772995, "step": 615 }, { "dpo_loss": 0.5214657187461853, "epoch": 1.7572035899858292, "grad_norm": 8592.256473559864, "learning_rate": 2.1784664857475356e-06, "logits": -0.5270652174949646, "logps": -76.55162811279297, "loss": 325.7626, "objective": 319.66046142578125, "ranking_simple": 0.625, "regularize": 0.23240961134433746, "step": 620 }, { "dpo_loss": 0.5136802196502686, "epoch": 1.7713745866792632, "grad_norm": 7718.82478571958, "learning_rate": 2.1375188698293855e-06, "logits": -0.462724506855011, "logps": -76.12433624267578, "loss": 330.2958, "objective": 325.9873046875, "ranking_simple": 0.6208333373069763, "regularize": 0.22590765357017517, "step": 625 }, { "dpo_loss": 0.536301851272583, "epoch": 1.7855455833726972, "grad_norm": 8761.966130498178, "learning_rate": 2.096670352632873e-06, "logits": -0.5007703304290771, "logps": -77.76058197021484, "loss": 326.3625, "objective": 339.7125549316406, "ranking_simple": 0.5625, "regularize": 0.23891252279281616, "step": 630 }, { "dpo_loss": 0.517907977104187, "epoch": 1.7997165800661312, "grad_norm": 7178.908644562367, "learning_rate": 2.0559321017347286e-06, "logits": -0.585433840751648, "logps": -76.77262878417969, "loss": 319.7427, "objective": 316.2701110839844, "ranking_simple": 0.637499988079071, "regularize": 0.218379944562912, "step": 635 }, { "dpo_loss": 0.5069996118545532, "epoch": 1.8138875767595655, "grad_norm": 7586.408561007724, "learning_rate": 2.01531525456598e-06, "logits": -0.5689796805381775, "logps": -77.61341094970703, "loss": 320.7854, "objective": 324.8812255859375, "ranking_simple": 0.5874999761581421, "regularize": 0.2512456178665161, "step": 640 }, { "dpo_loss": 0.5200368762016296, "epoch": 1.8280585734529995, "grad_norm": 8686.608118182792, "learning_rate": 1.974830915367086e-06, "logits": -0.5587595701217651, "logps": -78.18843078613281, "loss": 330.6056, "objective": 326.5403747558594, "ranking_simple": 0.637499988079071, "regularize": 0.24110235273838043, "step": 645 }, { "dpo_loss": 0.5109516382217407, "epoch": 1.8422295701464337, "grad_norm": 9365.891667609872, "learning_rate": 1.93449015215215e-06, "logits": -0.5691719055175781, "logps": -78.31002807617188, "loss": 333.5158, "objective": 333.96240234375, "ranking_simple": 0.612500011920929, "regularize": 0.23523901402950287, "step": 650 }, { "epoch": 1.8422295701464337, "eval_dpo_loss": 0.7084385752677917, "eval_logits": -0.596887469291687, "eval_logps": -81.02249908447266, "eval_loss": 577.92919921875, "eval_objective": 565.5914916992188, "eval_ranking_simple": 0.5393374562263489, "eval_regularize": 0.5890585780143738, "eval_runtime": 370.9761, "eval_samples_per_second": 15.607, "eval_steps_per_second": 1.302, "step": 650 }, { "dpo_loss": 0.5157236456871033, "epoch": 1.8564005668398678, "grad_norm": 8570.37706164772, "learning_rate": 1.8943039936830347e-06, "logits": -0.5198069214820862, "logps": -75.62894439697266, "loss": 321.6153, "objective": 320.3071594238281, "ranking_simple": 0.5916666388511658, "regularize": 0.2329235076904297, "step": 655 }, { "dpo_loss": 0.521937906742096, "epoch": 1.8705715635333018, "grad_norm": 8345.743884997746, "learning_rate": 1.8542834264542091e-06, "logits": -0.5126068592071533, "logps": -78.36030578613281, "loss": 328.7253, "objective": 325.4534606933594, "ranking_simple": 0.625, "regularize": 0.2483067512512207, "step": 660 }, { "dpo_loss": 0.515285313129425, "epoch": 1.8847425602267358, "grad_norm": 7929.985369188011, "learning_rate": 1.814439391689151e-06, "logits": -0.5389847159385681, "logps": -76.56269073486328, "loss": 319.5206, "objective": 331.35791015625, "ranking_simple": 0.6625000238418579, "regularize": 0.23167690634727478, "step": 665 }, { "dpo_loss": 0.5117120146751404, "epoch": 1.89891355692017, "grad_norm": 8611.003311273333, "learning_rate": 1.7747827823491253e-06, "logits": -0.4842732548713684, "logps": -75.30670928955078, "loss": 310.9476, "objective": 312.0437316894531, "ranking_simple": 0.5583333373069763, "regularize": 0.23464351892471313, "step": 670 }, { "dpo_loss": 0.5363326668739319, "epoch": 1.9130845536136043, "grad_norm": 8390.641065212347, "learning_rate": 1.7353244401551566e-06, "logits": -0.4712333679199219, "logps": -76.18867492675781, "loss": 317.9931, "objective": 336.70703125, "ranking_simple": 0.625, "regularize": 0.23009681701660156, "step": 675 }, { "dpo_loss": 0.5199182033538818, "epoch": 1.9272555503070383, "grad_norm": 8581.710628204828, "learning_rate": 1.6960751526240122e-06, "logits": -0.49357444047927856, "logps": -78.31690216064453, "loss": 315.1092, "objective": 305.8600769042969, "ranking_simple": 0.6291666626930237, "regularize": 0.20979805290699005, "step": 680 }, { "dpo_loss": 0.5106812119483948, "epoch": 1.9414265470004723, "grad_norm": 8197.15415072432, "learning_rate": 1.6570456501189996e-06, "logits": -0.5017139911651611, "logps": -76.01095581054688, "loss": 306.0857, "objective": 298.7364501953125, "ranking_simple": 0.5333333611488342, "regularize": 0.20531941950321198, "step": 685 }, { "dpo_loss": 0.5228912830352783, "epoch": 1.9555975436939064, "grad_norm": 8227.185493128562, "learning_rate": 1.6182466029163974e-06, "logits": -0.503932535648346, "logps": -76.89771270751953, "loss": 312.1701, "objective": 309.2964782714844, "ranking_simple": 0.5874999761581421, "regularize": 0.1995265930891037, "step": 690 }, { "dpo_loss": 0.5291420817375183, "epoch": 1.9697685403873406, "grad_norm": 8124.705980309312, "learning_rate": 1.5796886182883053e-06, "logits": -0.47076740860939026, "logps": -77.54142761230469, "loss": 303.4818, "objective": 305.0574951171875, "ranking_simple": 0.5625, "regularize": 0.16832788288593292, "step": 695 }, { "dpo_loss": 0.5130675435066223, "epoch": 1.9839395370807746, "grad_norm": 8798.438767811667, "learning_rate": 1.541382237602721e-06, "logits": -0.4266551434993744, "logps": -77.73848724365234, "loss": 316.2014, "objective": 308.5932312011719, "ranking_simple": 0.6041666865348816, "regularize": 0.20295077562332153, "step": 700 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 0.709835410118103, "eval_logits": -0.5956435203552246, "eval_logps": -80.5416488647461, "eval_loss": 577.6038208007812, "eval_objective": 564.6389770507812, "eval_ranking_simple": 0.5408902764320374, "eval_regularize": 0.5856665968894958, "eval_runtime": 367.2185, "eval_samples_per_second": 15.767, "eval_steps_per_second": 1.315, "step": 700 }, { "dpo_loss": 0.5043766498565674, "epoch": 1.9981105337742089, "grad_norm": 8813.527233337918, "learning_rate": 1.5033379334416376e-06, "logits": -0.415615975856781, "logps": -76.69499969482422, "loss": 307.2797, "objective": 299.0835266113281, "ranking_simple": 0.5833333134651184, "regularize": 0.1889916956424713, "step": 705 }, { "dpo_loss": 0.5075303316116333, "epoch": 2.012281530467643, "grad_norm": 8152.029201211714, "learning_rate": 1.465566106737942e-06, "logits": -0.5361739993095398, "logps": -76.06444549560547, "loss": 296.988, "objective": 296.4339904785156, "ranking_simple": 0.6291666626930237, "regularize": 0.16836042702198029, "step": 710 }, { "dpo_loss": 0.499095618724823, "epoch": 2.026452527161077, "grad_norm": 7474.705902903634, "learning_rate": 1.4280770839319073e-06, "logits": -0.5198894739151001, "logps": -75.47317504882812, "loss": 290.1166, "objective": 297.8743896484375, "ranking_simple": 0.7041666507720947, "regularize": 0.2067592293024063, "step": 715 }, { "dpo_loss": 0.49675101041793823, "epoch": 2.040623523854511, "grad_norm": 7687.909209756474, "learning_rate": 1.3908811141480408e-06, "logits": -0.48511701822280884, "logps": -76.5396957397461, "loss": 293.1388, "objective": 296.0254821777344, "ranking_simple": 0.625, "regularize": 0.18073533475399017, "step": 720 }, { "dpo_loss": 0.5085265636444092, "epoch": 2.0547945205479454, "grad_norm": 7150.825567237203, "learning_rate": 1.353988366393083e-06, "logits": -0.4982639253139496, "logps": -79.44617462158203, "loss": 288.661, "objective": 303.2115173339844, "ranking_simple": 0.6166666746139526, "regularize": 0.1895003318786621, "step": 725 }, { "dpo_loss": 0.5071407556533813, "epoch": 2.0689655172413794, "grad_norm": 7729.725354104077, "learning_rate": 1.3174089267758983e-06, "logits": -0.5406936407089233, "logps": -77.1041488647461, "loss": 295.914, "objective": 279.0093078613281, "ranking_simple": 0.6041666865348816, "regularize": 0.1544083058834076, "step": 730 }, { "dpo_loss": 0.508653998374939, "epoch": 2.0831365139348135, "grad_norm": 7709.562610036098, "learning_rate": 1.2811527957500344e-06, "logits": -0.5032610297203064, "logps": -76.36556243896484, "loss": 281.1705, "objective": 274.6349792480469, "ranking_simple": 0.5625, "regularize": 0.16756394505500793, "step": 735 }, { "dpo_loss": 0.5007545351982117, "epoch": 2.0973075106282475, "grad_norm": 7780.159502686646, "learning_rate": 1.245229885379699e-06, "logits": -0.5718483328819275, "logps": -76.52877807617188, "loss": 287.3269, "objective": 281.77728271484375, "ranking_simple": 0.625, "regularize": 0.15750272572040558, "step": 740 }, { "dpo_loss": 0.49083974957466125, "epoch": 2.1114785073216815, "grad_norm": 7559.0234640974395, "learning_rate": 1.2096500166298992e-06, "logits": -0.5142738223075867, "logps": -77.08470916748047, "loss": 288.8493, "objective": 278.6231384277344, "ranking_simple": 0.6208333373069763, "regularize": 0.1696869283914566, "step": 745 }, { "dpo_loss": 0.510530412197113, "epoch": 2.1256495040151155, "grad_norm": 8108.616262187852, "learning_rate": 1.1744229166814889e-06, "logits": -0.5391489267349243, "logps": -75.9011459350586, "loss": 295.2996, "objective": 287.6244201660156, "ranking_simple": 0.5791666507720947, "regularize": 0.14458681643009186, "step": 750 }, { "epoch": 2.1256495040151155, "eval_dpo_loss": 0.7108220458030701, "eval_logits": -0.5878574252128601, "eval_logps": -81.0738525390625, "eval_loss": 579.5015258789062, "eval_objective": 567.8404541015625, "eval_ranking_simple": 0.5393374562263489, "eval_regularize": 0.592528223991394, "eval_runtime": 367.7716, "eval_samples_per_second": 15.743, "eval_steps_per_second": 1.313, "step": 750 }, { "dpo_loss": 0.5009695291519165, "epoch": 2.13982050070855, "grad_norm": 8018.931804650577, "learning_rate": 1.1395582162718524e-06, "logits": -0.5374471545219421, "logps": -78.51016998291016, "loss": 291.8306, "objective": 290.9220275878906, "ranking_simple": 0.5916666388511658, "regularize": 0.1683264821767807, "step": 755 }, { "dpo_loss": 0.5287482738494873, "epoch": 2.153991497401984, "grad_norm": 7462.123206327768, "learning_rate": 1.1050654470619602e-06, "logits": -0.46891796588897705, "logps": -75.76030731201172, "loss": 290.7075, "objective": 282.1697082519531, "ranking_simple": 0.6041666865348816, "regularize": 0.15730994939804077, "step": 760 }, { "dpo_loss": 0.5191124081611633, "epoch": 2.168162494095418, "grad_norm": 8366.940049042583, "learning_rate": 1.0709540390305061e-06, "logits": -0.48605969548225403, "logps": -76.97061920166016, "loss": 283.513, "objective": 276.4500732421875, "ranking_simple": 0.6333333253860474, "regularize": 0.14232973754405975, "step": 765 }, { "dpo_loss": 0.5144416093826294, "epoch": 2.182333490788852, "grad_norm": 8218.822763503342, "learning_rate": 1.0372333178958462e-06, "logits": -0.4805113971233368, "logps": -77.28369140625, "loss": 295.4732, "objective": 300.4273376464844, "ranking_simple": 0.5874999761581421, "regularize": 0.17838290333747864, "step": 770 }, { "dpo_loss": 0.5137962698936462, "epoch": 2.196504487482286, "grad_norm": 6441.500828763883, "learning_rate": 1.0039125025664392e-06, "logits": -0.5039299130439758, "logps": -77.55305480957031, "loss": 277.693, "objective": 280.0492248535156, "ranking_simple": 0.6541666388511658, "regularize": 0.14146603643894196, "step": 775 }, { "dpo_loss": 0.5056738257408142, "epoch": 2.21067548417572, "grad_norm": 8834.102544429094, "learning_rate": 9.710007026204896e-07, "logits": -0.43781086802482605, "logps": -77.69278717041016, "loss": 288.3063, "objective": 297.6618347167969, "ranking_simple": 0.5249999761581421, "regularize": 0.16135714948177338, "step": 780 }, { "dpo_loss": 0.4925435483455658, "epoch": 2.2248464808691546, "grad_norm": 7651.241881609664, "learning_rate": 9.385069158154805e-07, "logits": -0.4533029794692993, "logps": -77.70331573486328, "loss": 280.9115, "objective": 270.46942138671875, "ranking_simple": 0.5833333134651184, "regularize": 0.14943251013755798, "step": 785 }, { "dpo_loss": 0.49593406915664673, "epoch": 2.2390174775625886, "grad_norm": 7219.442805293083, "learning_rate": 9.064400256282757e-07, "logits": -0.44717687368392944, "logps": -78.27497863769531, "loss": 282.4128, "objective": 267.8057861328125, "ranking_simple": 0.6333333253860474, "regularize": 0.15546827018260956, "step": 790 }, { "dpo_loss": 0.505749523639679, "epoch": 2.2531884742560226, "grad_norm": 7763.536847557715, "learning_rate": 8.74808798826467e-07, "logits": -0.4969979226589203, "logps": -79.33271789550781, "loss": 281.3165, "objective": 274.2493896484375, "ranking_simple": 0.625, "regularize": 0.1496947556734085, "step": 795 }, { "dpo_loss": 0.48660808801651, "epoch": 2.2673594709494567, "grad_norm": 7932.089944092327, "learning_rate": 8.436218830716259e-07, "logits": -0.5253292322158813, "logps": -78.79373931884766, "loss": 290.0791, "objective": 280.91900634765625, "ranking_simple": 0.612500011920929, "regularize": 0.16623292863368988, "step": 800 }, { "epoch": 2.2673594709494567, "eval_dpo_loss": 0.7088403105735779, "eval_logits": -0.5885158777236938, "eval_logps": -81.68885803222656, "eval_loss": 576.8207397460938, "eval_objective": 564.8282470703125, "eval_ranking_simple": 0.5377846956253052, "eval_regularize": 0.5875974893569946, "eval_runtime": 367.7198, "eval_samples_per_second": 15.746, "eval_steps_per_second": 1.314, "step": 800 }, { "dpo_loss": 0.5162143707275391, "epoch": 2.2815304676428907, "grad_norm": 7214.841128896386, "learning_rate": 8.1288780455512e-07, "logits": -0.5276073217391968, "logps": -77.3137435913086, "loss": 278.3046, "objective": 292.67730712890625, "ranking_simple": 0.5874999761581421, "regularize": 0.16379231214523315, "step": 805 }, { "dpo_loss": 0.5017233490943909, "epoch": 2.295701464336325, "grad_norm": 6869.828749708057, "learning_rate": 7.826149656671386e-07, "logits": -0.5925108790397644, "logps": -76.71666717529297, "loss": 282.4796, "objective": 281.0669860839844, "ranking_simple": 0.675000011920929, "regularize": 0.1503111720085144, "step": 810 }, { "dpo_loss": 0.4981227219104767, "epoch": 2.309872461029759, "grad_norm": 7651.540848559703, "learning_rate": 7.528116426995605e-07, "logits": -0.5629077553749084, "logps": -78.19300079345703, "loss": 273.4494, "objective": 278.7046203613281, "ranking_simple": 0.5958333611488342, "regularize": 0.13803134858608246, "step": 815 }, { "dpo_loss": 0.5106547474861145, "epoch": 2.324043457723193, "grad_norm": 8303.88053874128, "learning_rate": 7.234859835833022e-07, "logits": -0.49964413046836853, "logps": -77.4349136352539, "loss": 281.5386, "objective": 289.7933349609375, "ranking_simple": 0.5833333134651184, "regularize": 0.14034216105937958, "step": 820 }, { "dpo_loss": 0.5033460259437561, "epoch": 2.3382144544166272, "grad_norm": 7465.14408583757, "learning_rate": 6.94646005660749e-07, "logits": -0.5037187337875366, "logps": -76.92729949951172, "loss": 278.1418, "objective": 275.7555847167969, "ranking_simple": 0.637499988079071, "regularize": 0.14195986092090607, "step": 825 }, { "dpo_loss": 0.5009591579437256, "epoch": 2.3523854511100613, "grad_norm": 7370.912864973515, "learning_rate": 6.662995934939007e-07, "logits": -0.5249782800674438, "logps": -78.88058471679688, "loss": 277.6341, "objective": 290.5874328613281, "ranking_simple": 0.6541666388511658, "regularize": 0.15550047159194946, "step": 830 }, { "dpo_loss": 0.506280243396759, "epoch": 2.3665564478034957, "grad_norm": 7154.510003938923, "learning_rate": 6.384544967088063e-07, "logits": -0.5261546969413757, "logps": -78.27130889892578, "loss": 283.8206, "objective": 284.1214294433594, "ranking_simple": 0.6333333253860474, "regularize": 0.15022988617420197, "step": 835 }, { "dpo_loss": 0.5248311758041382, "epoch": 2.3807274444969297, "grad_norm": 7561.035287310657, "learning_rate": 6.111183278768956e-07, "logits": -0.47096776962280273, "logps": -78.47908020019531, "loss": 281.1299, "objective": 287.78863525390625, "ranking_simple": 0.5458333492279053, "regularize": 0.13501495122909546, "step": 840 }, { "dpo_loss": 0.5237764120101929, "epoch": 2.3948984411903638, "grad_norm": 9019.208502842846, "learning_rate": 5.842985604337769e-07, "logits": -0.524657666683197, "logps": -79.37838745117188, "loss": 288.7943, "objective": 290.1376647949219, "ranking_simple": 0.6083333492279053, "regularize": 0.13873761892318726, "step": 845 }, { "dpo_loss": 0.5015512704849243, "epoch": 2.409069437883798, "grad_norm": 7214.405283580069, "learning_rate": 5.580025266360764e-07, "logits": -0.5334345102310181, "logps": -77.42970275878906, "loss": 277.1292, "objective": 274.3782043457031, "ranking_simple": 0.5916666388511658, "regularize": 0.13637620210647583, "step": 850 }, { "epoch": 2.409069437883798, "eval_dpo_loss": 0.7109217643737793, "eval_logits": -0.5770819187164307, "eval_logps": -81.54353332519531, "eval_loss": 579.0093994140625, "eval_objective": 567.1205444335938, "eval_ranking_simple": 0.5383023023605347, "eval_regularize": 0.5911442041397095, "eval_runtime": 370.8205, "eval_samples_per_second": 15.614, "eval_steps_per_second": 1.303, "step": 850 }, { "dpo_loss": 0.5071312785148621, "epoch": 2.423240434577232, "grad_norm": 7272.234405221214, "learning_rate": 5.322374155568688e-07, "logits": -0.5134973526000977, "logps": -76.6072769165039, "loss": 278.8906, "objective": 276.31671142578125, "ranking_simple": 0.6083333492279053, "regularize": 0.14478901028633118, "step": 855 }, { "dpo_loss": 0.5126808881759644, "epoch": 2.4374114312706663, "grad_norm": 8279.020308548867, "learning_rate": 5.070102711202606e-07, "logits": -0.4904005825519562, "logps": -78.11277770996094, "loss": 272.6675, "objective": 259.9554748535156, "ranking_simple": 0.6041666865348816, "regularize": 0.12409182637929916, "step": 860 }, { "dpo_loss": 0.5031525492668152, "epoch": 2.4515824279641003, "grad_norm": 7810.5501789039035, "learning_rate": 4.823279901756498e-07, "logits": -0.5084951519966125, "logps": -77.30481719970703, "loss": 278.5447, "objective": 284.18084716796875, "ranking_simple": 0.5916666388511658, "regularize": 0.1416517198085785, "step": 865 }, { "dpo_loss": 0.5099405646324158, "epoch": 2.4657534246575343, "grad_norm": 7054.3955623341435, "learning_rate": 4.581973206121948e-07, "logits": -0.522720456123352, "logps": -78.04560852050781, "loss": 278.9006, "objective": 274.6994934082031, "ranking_simple": 0.5958333611488342, "regularize": 0.13992194831371307, "step": 870 }, { "dpo_loss": 0.4974448084831238, "epoch": 2.4799244213509684, "grad_norm": 7158.801947079291, "learning_rate": 4.3462485951401126e-07, "logits": -0.481945663690567, "logps": -77.63534545898438, "loss": 265.5646, "objective": 269.0285949707031, "ranking_simple": 0.612500011920929, "regularize": 0.13618159294128418, "step": 875 }, { "dpo_loss": 0.516840398311615, "epoch": 2.4940954180444024, "grad_norm": 7224.52824602365, "learning_rate": 4.116170513565942e-07, "logits": -0.42012926936149597, "logps": -77.26931762695312, "loss": 277.9572, "objective": 277.60955810546875, "ranking_simple": 0.5708333253860474, "regularize": 0.12541402876377106, "step": 880 }, { "dpo_loss": 0.5121944546699524, "epoch": 2.5082664147378364, "grad_norm": 7670.321121071118, "learning_rate": 3.891801862449629e-07, "logits": -0.5377725958824158, "logps": -76.21176147460938, "loss": 273.7286, "objective": 278.1051940917969, "ranking_simple": 0.612500011920929, "regularize": 0.13135148584842682, "step": 885 }, { "dpo_loss": 0.5038707852363586, "epoch": 2.5224374114312704, "grad_norm": 7464.3594622623605, "learning_rate": 3.6732039819400686e-07, "logits": -0.5120099782943726, "logps": -75.08890533447266, "loss": 270.7643, "objective": 259.2137145996094, "ranking_simple": 0.6000000238418579, "regularize": 0.1371425837278366, "step": 890 }, { "dpo_loss": 0.5104149580001831, "epoch": 2.536608408124705, "grad_norm": 7776.17053954941, "learning_rate": 3.46043663451511e-07, "logits": -0.5063762068748474, "logps": -77.50283813476562, "loss": 274.0527, "objective": 279.8547668457031, "ranking_simple": 0.5708333253860474, "regularize": 0.13040438294410706, "step": 895 }, { "dpo_loss": 0.5273467302322388, "epoch": 2.550779404818139, "grad_norm": 7627.369911355359, "learning_rate": 3.253557988643072e-07, "logits": -0.49025458097457886, "logps": -76.16547393798828, "loss": 271.9766, "objective": 274.15386962890625, "ranking_simple": 0.625, "regularize": 0.13161370158195496, "step": 900 }, { "epoch": 2.550779404818139, "eval_dpo_loss": 0.7098731994628906, "eval_logits": -0.5707982182502747, "eval_logps": -81.16320037841797, "eval_loss": 577.3417358398438, "eval_objective": 565.7183837890625, "eval_ranking_simple": 0.5362318754196167, "eval_regularize": 0.5880821347236633, "eval_runtime": 368.1908, "eval_samples_per_second": 15.726, "eval_steps_per_second": 1.312, "step": 900 }, { "dpo_loss": 0.512609601020813, "epoch": 2.564950401511573, "grad_norm": 6804.885302407535, "learning_rate": 3.052624602880064e-07, "logits": -0.5115708112716675, "logps": -75.81733703613281, "loss": 265.9078, "objective": 269.9800109863281, "ranking_simple": 0.5833333134651184, "regularize": 0.14800624549388885, "step": 905 }, { "dpo_loss": 0.4937320947647095, "epoch": 2.579121398205007, "grad_norm": 7198.203187575293, "learning_rate": 2.8576914104074425e-07, "logits": -0.48827874660491943, "logps": -79.18850708007812, "loss": 265.7443, "objective": 273.89166259765625, "ranking_simple": 0.6041666865348816, "regularize": 0.14111953973770142, "step": 910 }, { "dpo_loss": 0.5084269642829895, "epoch": 2.593292394898441, "grad_norm": 6942.193456170007, "learning_rate": 2.6688117040136463e-07, "logits": -0.5942879915237427, "logps": -78.21766662597656, "loss": 278.3137, "objective": 263.6408386230469, "ranking_simple": 0.637499988079071, "regularize": 0.11606747657060623, "step": 915 }, { "dpo_loss": 0.498431533575058, "epoch": 2.6074633915918755, "grad_norm": 6451.638108152793, "learning_rate": 2.486037121524448e-07, "logits": -0.46755722165107727, "logps": -77.2091293334961, "loss": 270.5728, "objective": 287.58538818359375, "ranking_simple": 0.612500011920929, "regularize": 0.14371080696582794, "step": 920 }, { "dpo_loss": 0.5088561177253723, "epoch": 2.6216343882853095, "grad_norm": 8257.41304629772, "learning_rate": 2.3094176316856982e-07, "logits": -0.4761093258857727, "logps": -76.82450866699219, "loss": 270.0178, "objective": 261.9400634765625, "ranking_simple": 0.5708333253860474, "regularize": 0.11346574872732162, "step": 925 }, { "dpo_loss": 0.5108747482299805, "epoch": 2.6358053849787435, "grad_norm": 7715.040672522825, "learning_rate": 2.13900152050239e-07, "logits": -0.4767756164073944, "logps": -77.13011932373047, "loss": 273.9086, "objective": 285.1964111328125, "ranking_simple": 0.5666666626930237, "regularize": 0.13920140266418457, "step": 930 }, { "dpo_loss": 0.5189302563667297, "epoch": 2.6499763816721775, "grad_norm": 8548.789471220947, "learning_rate": 1.9748353780377234e-07, "logits": -0.45983853936195374, "logps": -76.03173828125, "loss": 276.8373, "objective": 269.6080322265625, "ranking_simple": 0.6583333611488342, "regularize": 0.12301207333803177, "step": 935 }, { "dpo_loss": 0.5017234683036804, "epoch": 2.6641473783656116, "grad_norm": 8162.699837992091, "learning_rate": 1.8169640856758652e-07, "logits": -0.504283607006073, "logps": -78.90401458740234, "loss": 281.2083, "objective": 285.80694580078125, "ranking_simple": 0.5958333611488342, "regularize": 0.1213822215795517, "step": 940 }, { "dpo_loss": 0.4903925657272339, "epoch": 2.678318375059046, "grad_norm": 7208.828785964443, "learning_rate": 1.6654308038518057e-07, "logits": -0.5296005010604858, "logps": -77.27458953857422, "loss": 265.3151, "objective": 269.78106689453125, "ranking_simple": 0.5833333134651184, "regularize": 0.12937407195568085, "step": 945 }, { "dpo_loss": 0.5223451256752014, "epoch": 2.69248937175248, "grad_norm": 7752.613058246486, "learning_rate": 1.5202769602517514e-07, "logits": -0.45917627215385437, "logps": -77.59455871582031, "loss": 273.4982, "objective": 260.7156982421875, "ranking_simple": 0.6333333253860474, "regularize": 0.10433920472860336, "step": 950 }, { "epoch": 2.69248937175248, "eval_dpo_loss": 0.7103093862533569, "eval_logits": -0.5680380463600159, "eval_logps": -81.19538879394531, "eval_loss": 578.9320678710938, "eval_objective": 567.1773071289062, "eval_ranking_simple": 0.5362318754196167, "eval_regularize": 0.5910264849662781, "eval_runtime": 367.6922, "eval_samples_per_second": 15.747, "eval_steps_per_second": 1.314, "step": 950 }, { "dpo_loss": 0.5060604810714722, "epoch": 2.706660368445914, "grad_norm": 6751.002791925838, "learning_rate": 1.381542238487188e-07, "logits": -0.5122019648551941, "logps": -77.12061309814453, "loss": 264.4783, "objective": 250.47158813476562, "ranking_simple": 0.5916666388511658, "regularize": 0.12410003691911697, "step": 955 }, { "dpo_loss": 0.5180147886276245, "epoch": 2.720831365139348, "grad_norm": 8054.959159664104, "learning_rate": 1.2492645672457838e-07, "logits": -0.48304110765457153, "logps": -77.1063003540039, "loss": 279.6324, "objective": 290.03631591796875, "ranking_simple": 0.6041666865348816, "regularize": 0.13714757561683655, "step": 960 }, { "dpo_loss": 0.49898579716682434, "epoch": 2.735002361832782, "grad_norm": 7601.482545856324, "learning_rate": 1.1234801099220787e-07, "logits": -0.5196807980537415, "logps": -77.11388397216797, "loss": 269.8563, "objective": 259.89813232421875, "ranking_simple": 0.6416666507720947, "regularize": 0.13252395391464233, "step": 965 }, { "dpo_loss": 0.51671302318573, "epoch": 2.7491733585262166, "grad_norm": 7233.095787852224, "learning_rate": 1.004223254730749e-07, "logits": -0.5174197554588318, "logps": -78.05948638916016, "loss": 262.7624, "objective": 268.42169189453125, "ranking_simple": 0.6333333253860474, "regularize": 0.13439247012138367, "step": 970 }, { "dpo_loss": 0.4989195764064789, "epoch": 2.7633443552196506, "grad_norm": 7653.138309833151, "learning_rate": 8.915266053052374e-08, "logits": -0.4456302523612976, "logps": -76.5742416381836, "loss": 265.2271, "objective": 260.4195556640625, "ranking_simple": 0.625, "regularize": 0.12436621636152267, "step": 975 }, { "dpo_loss": 0.5052908658981323, "epoch": 2.7775153519130846, "grad_norm": 7354.157364452891, "learning_rate": 7.854209717842231e-08, "logits": -0.5343514084815979, "logps": -76.55020141601562, "loss": 273.2646, "objective": 284.3968200683594, "ranking_simple": 0.6083333492279053, "regularize": 0.1349634826183319, "step": 980 }, { "dpo_loss": 0.5007596015930176, "epoch": 2.7916863486065187, "grad_norm": 6932.45863115929, "learning_rate": 6.859353623884569e-08, "logits": -0.44778621196746826, "logps": -74.72814178466797, "loss": 272.9477, "objective": 261.4037170410156, "ranking_simple": 0.5874999761581421, "regularize": 0.12619872391223907, "step": 985 }, { "dpo_loss": 0.50762540102005, "epoch": 2.8058573452999527, "grad_norm": 7347.273398591806, "learning_rate": 5.930969754901844e-08, "logits": -0.4778214693069458, "logps": -78.38259887695312, "loss": 260.7251, "objective": 262.8490905761719, "ranking_simple": 0.5791666507720947, "regularize": 0.11383456736803055, "step": 990 }, { "dpo_loss": 0.5177646279335022, "epoch": 2.820028341993387, "grad_norm": 8414.372934711379, "learning_rate": 5.069311921774039e-08, "logits": -0.5479720830917358, "logps": -76.77764129638672, "loss": 273.3689, "objective": 282.17889404296875, "ranking_simple": 0.6208333373069763, "regularize": 0.13415595889091492, "step": 995 }, { "dpo_loss": 0.5031678080558777, "epoch": 2.8341993386868207, "grad_norm": 6910.266147708002, "learning_rate": 4.2746156931490756e-08, "logits": -0.4682956337928772, "logps": -75.585205078125, "loss": 265.7935, "objective": 275.0522155761719, "ranking_simple": 0.612500011920929, "regularize": 0.12777787446975708, "step": 1000 }, { "epoch": 2.8341993386868207, "eval_dpo_loss": 0.709902822971344, "eval_logits": -0.5704091191291809, "eval_logps": -81.14698028564453, "eval_loss": 578.3192138671875, "eval_objective": 566.560791015625, "eval_ranking_simple": 0.5367494821548462, "eval_regularize": 0.5901817083358765, "eval_runtime": 368.6553, "eval_samples_per_second": 15.706, "eval_steps_per_second": 1.31, "step": 1000 }, { "dpo_loss": 0.5090419054031372, "epoch": 2.848370335380255, "grad_norm": 7970.005574260741, "learning_rate": 3.547098331040916e-08, "logits": -0.48795023560523987, "logps": -76.1548843383789, "loss": 272.1969, "objective": 268.9000244140625, "ranking_simple": 0.6166666746139526, "regularize": 0.12190086394548416, "step": 1005 }, { "dpo_loss": 0.5107741355895996, "epoch": 2.862541332073689, "grad_norm": 7994.674781206224, "learning_rate": 2.8869587314321324e-08, "logits": -0.5256258845329285, "logps": -76.37800598144531, "loss": 282.5812, "objective": 272.2499084472656, "ranking_simple": 0.612500011920929, "regularize": 0.11501624435186386, "step": 1010 }, { "dpo_loss": 0.5150614380836487, "epoch": 2.8767123287671232, "grad_norm": 7261.730587266494, "learning_rate": 2.2943773698977935e-08, "logits": -0.4966468811035156, "logps": -75.5634994506836, "loss": 260.7579, "objective": 261.33038330078125, "ranking_simple": 0.6458333134651184, "regularize": 0.11271250247955322, "step": 1015 }, { "dpo_loss": 0.508990466594696, "epoch": 2.8908833254605573, "grad_norm": 7841.280059779772, "learning_rate": 1.7695162522652352e-08, "logits": -0.4532057046890259, "logps": -77.1956558227539, "loss": 271.9978, "objective": 272.5274963378906, "ranking_simple": 0.5833333134651184, "regularize": 0.12634117901325226, "step": 1020 }, { "dpo_loss": 0.5181344151496887, "epoch": 2.9050543221539913, "grad_norm": 7685.424658753134, "learning_rate": 1.3125188703233815e-08, "logits": -0.53793865442276, "logps": -77.12080383300781, "loss": 268.2244, "objective": 275.9144287109375, "ranking_simple": 0.625, "regularize": 0.12161087244749069, "step": 1025 }, { "dpo_loss": 0.5186858773231506, "epoch": 2.9192253188474258, "grad_norm": 7664.878619416483, "learning_rate": 9.235101625932885e-09, "logits": -0.535256564617157, "logps": -76.86356353759766, "loss": 277.9306, "objective": 276.3569641113281, "ranking_simple": 0.5541666746139526, "regularize": 0.13014444708824158, "step": 1030 }, { "dpo_loss": 0.5086424350738525, "epoch": 2.9333963155408598, "grad_norm": 7176.213599878172, "learning_rate": 6.025964801714412e-09, "logits": -0.4943471848964691, "logps": -77.83872985839844, "loss": 266.904, "objective": 264.484375, "ranking_simple": 0.6208333373069763, "regularize": 0.11565522104501724, "step": 1035 }, { "dpo_loss": 0.5082514882087708, "epoch": 2.947567312234294, "grad_norm": 7034.435559100273, "learning_rate": 3.4986555765434415e-09, "logits": -0.5228769779205322, "logps": -77.66551208496094, "loss": 271.848, "objective": 274.83355712890625, "ranking_simple": 0.5666666626930237, "regularize": 0.11535345017910004, "step": 1040 }, { "dpo_loss": 0.5133834481239319, "epoch": 2.961738308927728, "grad_norm": 7599.023611459706, "learning_rate": 1.6538648915270794e-09, "logits": -0.47324731945991516, "logps": -79.62892150878906, "loss": 268.8998, "objective": 262.8771667480469, "ranking_simple": 0.6208333373069763, "regularize": 0.12152813374996185, "step": 1045 }, { "dpo_loss": 0.5033511519432068, "epoch": 2.975909305621162, "grad_norm": 8022.09759775128, "learning_rate": 4.920970940180958e-10, "logits": -0.5222968459129333, "logps": -75.3882827758789, "loss": 265.6855, "objective": 262.70672607421875, "ranking_simple": 0.6541666388511658, "regularize": 0.1311034858226776, "step": 1050 }, { "epoch": 2.975909305621162, "eval_dpo_loss": 0.7098360061645508, "eval_logits": -0.5715492367744446, "eval_logps": -81.13370513916016, "eval_loss": 578.1592407226562, "eval_objective": 566.3953857421875, "eval_ranking_simple": 0.5362318754196167, "eval_regularize": 0.5898736715316772, "eval_runtime": 367.7364, "eval_samples_per_second": 15.745, "eval_steps_per_second": 1.313, "step": 1050 }, { "dpo_loss": 0.5144506692886353, "epoch": 2.9900803023145963, "grad_norm": 7870.107583704036, "learning_rate": 1.3669799732163314e-11, "logits": -0.4851941168308258, "logps": -76.2236099243164, "loss": 272.1204, "objective": 281.3744201660156, "ranking_simple": 0.6166666746139526, "regularize": 0.13684484362602234, "step": 1055 }, { "epoch": 2.992914501653283, "step": 1056, "total_flos": 0.0, "train_loss": 356.3672220056707, "train_runtime": 34691.2758, "train_samples_per_second": 4.393, "train_steps_per_second": 0.03 } ], "logging_steps": 5, "max_steps": 1056, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }