{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.992914501653283, "eval_steps": 50, "global_step": 1056, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "dpo_loss": 0.6931471824645996, "epoch": 0.002834199338686821, "grad_norm": 17675.58033930074, "learning_rate": 4.716981132075472e-09, "logits": -1.2867579460144043, "logps": -84.34933471679688, "loss": 169.5214, "objective": 153.4677734375, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.3618059456348419, "step": 1, "wo_beta": 14.83154582977295 }, { "dpo_loss": 0.6930959224700928, "epoch": 0.014170996693434105, "grad_norm": 16812.231100839916, "learning_rate": 2.3584905660377358e-08, "logits": -1.4290882349014282, "logps": -83.8636474609375, "loss": 181.7078, "objective": 168.5659942626953, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.4895833432674408, "ranking_simple": 0.4895833432674408, "regularize": 0.40367603302001953, "step": 5, "wo_beta": 16.679981231689453 }, { "dpo_loss": 0.6930798292160034, "epoch": 0.02834199338686821, "grad_norm": 18597.19901899509, "learning_rate": 4.7169811320754715e-08, "logits": -1.4008352756500244, "logps": -84.84938049316406, "loss": 177.1073, "objective": 170.35797119140625, "ranking_idealized": 0.6708333492279053, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.5791666507720947, "regularize": 0.4039422273635864, "step": 10, "wo_beta": 15.222180366516113 }, { "dpo_loss": 0.6921038031578064, "epoch": 0.042512990080302314, "grad_norm": 17855.275799007577, "learning_rate": 7.075471698113207e-08, "logits": -1.538023829460144, "logps": -84.5517578125, "loss": 178.9814, "objective": 187.4513702392578, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.5666666626930237, "regularize": 0.4422657787799835, "step": 15, "wo_beta": 15.718367576599121 }, { "dpo_loss": 0.6917796730995178, "epoch": 0.05668398677373642, "grad_norm": 17564.60110673315, "learning_rate": 9.433962264150943e-08, "logits": -1.3617039918899536, "logps": -83.66792297363281, "loss": 185.8199, "objective": 204.0640411376953, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.48750001192092896, "regularize": 0.44205835461616516, "step": 20, "wo_beta": 16.52640151977539 }, { "dpo_loss": 0.6927011013031006, "epoch": 0.07085498346717052, "grad_norm": 16991.775752313566, "learning_rate": 1.1792452830188679e-07, "logits": -1.3692513704299927, "logps": -83.765869140625, "loss": 182.1115, "objective": 173.06422424316406, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.5666666626930237, "regularize": 0.40760377049446106, "step": 25, "wo_beta": 15.608158111572266 }, { "dpo_loss": 0.6904457211494446, "epoch": 0.08502598016060463, "grad_norm": 14856.204222337537, "learning_rate": 1.4150943396226414e-07, "logits": -1.4308700561523438, "logps": -83.563232421875, "loss": 181.7541, "objective": 176.98880004882812, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.48750001192092896, "regularize": 0.43005380034446716, "step": 30, "wo_beta": 17.01230812072754 }, { "dpo_loss": 0.6906622648239136, "epoch": 0.09919697685403873, "grad_norm": 16081.157641472842, "learning_rate": 1.650943396226415e-07, "logits": -1.4087789058685303, "logps": -82.7640151977539, "loss": 184.1344, "objective": 172.8912811279297, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5416666865348816, "regularize": 0.37934428453445435, "step": 35, "wo_beta": 16.152484893798828 }, { "dpo_loss": 0.6896480917930603, "epoch": 0.11336797354747284, "grad_norm": 17056.964984105944, "learning_rate": 1.8867924528301886e-07, "logits": -1.4006307125091553, "logps": -83.35142517089844, "loss": 188.1977, "objective": 182.53704833984375, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5166666507720947, "regularize": 0.40673011541366577, "step": 40, "wo_beta": 14.254140853881836 }, { "dpo_loss": 0.6860460638999939, "epoch": 0.12753897024090693, "grad_norm": 15541.887109298903, "learning_rate": 2.1226415094339622e-07, "logits": -1.4170690774917603, "logps": -83.82962799072266, "loss": 172.0023, "objective": 184.33473205566406, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.512499988079071, "regularize": 0.41498348116874695, "step": 45, "wo_beta": 14.2799711227417 }, { "dpo_loss": 0.6840464472770691, "epoch": 0.14170996693434104, "grad_norm": 16674.096437377164, "learning_rate": 2.3584905660377358e-07, "logits": -1.4327392578125, "logps": -84.8567123413086, "loss": 182.5182, "objective": 187.45941162109375, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5041666626930237, "regularize": 0.43751442432403564, "step": 50, "wo_beta": 15.616755485534668 }, { "epoch": 0.14170996693434104, "eval_dpo_loss": 0.689544677734375, "eval_logits": -1.4199916124343872, "eval_logps": -90.85165405273438, "eval_loss": 182.50025939941406, "eval_objective": 180.4892578125, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5248447060585022, "eval_regularize": 0.4092595875263214, "eval_runtime": 369.2017, "eval_samples_per_second": 15.682, "eval_steps_per_second": 1.308, "eval_wo_beta": 16.310007095336914, "step": 50 }, { "dpo_loss": 0.6828119158744812, "epoch": 0.15588096362777515, "grad_norm": 17241.419986006367, "learning_rate": 2.5943396226415094e-07, "logits": -1.3938590288162231, "logps": -84.56362915039062, "loss": 171.7727, "objective": 174.47201538085938, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.4833333194255829, "regularize": 0.3751158118247986, "step": 55, "wo_beta": 15.145721435546875 }, { "dpo_loss": 0.6828226447105408, "epoch": 0.17005196032120926, "grad_norm": 16193.112384702756, "learning_rate": 2.830188679245283e-07, "logits": -1.325377345085144, "logps": -85.11466217041016, "loss": 175.0018, "objective": 174.82723999023438, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5208333134651184, "regularize": 0.41120079159736633, "step": 60, "wo_beta": 14.497312545776367 }, { "dpo_loss": 0.673675537109375, "epoch": 0.18422295701464336, "grad_norm": 16474.473591772632, "learning_rate": 3.066037735849056e-07, "logits": -1.4237332344055176, "logps": -82.10260772705078, "loss": 174.1747, "objective": 163.51272583007812, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5333333611488342, "regularize": 0.3746616542339325, "step": 65, "wo_beta": 15.076056480407715 }, { "dpo_loss": 0.6760156750679016, "epoch": 0.19839395370807747, "grad_norm": 15655.300421670066, "learning_rate": 3.30188679245283e-07, "logits": -1.4630695581436157, "logps": -84.45524597167969, "loss": 175.2515, "objective": 174.8110809326172, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5333333611488342, "regularize": 0.40165895223617554, "step": 70, "wo_beta": 15.362497329711914 }, { "dpo_loss": 0.6764008402824402, "epoch": 0.21256495040151158, "grad_norm": 18669.073461411434, "learning_rate": 3.5377358490566033e-07, "logits": -1.3853403329849243, "logps": -84.13139343261719, "loss": 176.1206, "objective": 166.9723663330078, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5208333134651184, "regularize": 0.38751351833343506, "step": 75, "wo_beta": 15.094878196716309 }, { "dpo_loss": 0.6740989089012146, "epoch": 0.22673594709494568, "grad_norm": 19007.32032313182, "learning_rate": 3.773584905660377e-07, "logits": -1.481835126876831, "logps": -83.50402069091797, "loss": 170.5001, "objective": 178.72813415527344, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5458333492279053, "regularize": 0.42763322591781616, "step": 80, "wo_beta": 16.335308074951172 }, { "dpo_loss": 0.6696261167526245, "epoch": 0.2409069437883798, "grad_norm": 15826.33154406391, "learning_rate": 4.009433962264151e-07, "logits": -1.4629038572311401, "logps": -83.72789001464844, "loss": 173.4106, "objective": 184.8107452392578, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.550000011920929, "regularize": 0.41807958483695984, "step": 85, "wo_beta": 15.131178855895996 }, { "dpo_loss": 0.6652686595916748, "epoch": 0.25507794048181387, "grad_norm": 16251.074571263394, "learning_rate": 4.2452830188679244e-07, "logits": -1.562590479850769, "logps": -84.50687408447266, "loss": 170.0469, "objective": 178.0680694580078, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5541666746139526, "regularize": 0.40833932161331177, "step": 90, "wo_beta": 15.540740966796875 }, { "dpo_loss": 0.6606998443603516, "epoch": 0.269248937175248, "grad_norm": 17104.36417405731, "learning_rate": 4.481132075471698e-07, "logits": -1.4817092418670654, "logps": -84.26000213623047, "loss": 172.5874, "objective": 179.69664001464844, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.512499988079071, "regularize": 0.412168025970459, "step": 95, "wo_beta": 16.397871017456055 }, { "dpo_loss": 0.6613048315048218, "epoch": 0.2834199338686821, "grad_norm": 16520.717626446203, "learning_rate": 4.7169811320754717e-07, "logits": -1.3626132011413574, "logps": -83.24072265625, "loss": 159.305, "objective": 164.95997619628906, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5, "regularize": 0.37101998925209045, "step": 100, "wo_beta": 15.463290214538574 }, { "epoch": 0.2834199338686821, "eval_dpo_loss": 0.6859607100486755, "eval_logits": -1.4621983766555786, "eval_logps": -91.35309600830078, "eval_loss": 182.15220642089844, "eval_objective": 180.52191162109375, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5310559272766113, "eval_regularize": 0.41025590896606445, "eval_runtime": 368.4707, "eval_samples_per_second": 15.714, "eval_steps_per_second": 1.311, "eval_wo_beta": 16.38188362121582, "step": 100 }, { "dpo_loss": 0.6732772588729858, "epoch": 0.2975909305621162, "grad_norm": 20087.528218167263, "learning_rate": 4.952830188679246e-07, "logits": -1.5680618286132812, "logps": -86.15119934082031, "loss": 163.5086, "objective": 165.89260864257812, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5375000238418579, "regularize": 0.38661062717437744, "step": 105, "wo_beta": 14.799639701843262 }, { "dpo_loss": 0.6610164046287537, "epoch": 0.3117619272555503, "grad_norm": 16022.782640878671, "learning_rate": 4.999781286194085e-07, "logits": -1.470965027809143, "logps": -85.03189849853516, "loss": 162.0049, "objective": 163.46115112304688, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5166666507720947, "regularize": 0.38891494274139404, "step": 110, "wo_beta": 15.151239395141602 }, { "dpo_loss": 0.6605216860771179, "epoch": 0.32593292394898443, "grad_norm": 17332.290004559494, "learning_rate": 4.998892826944417e-07, "logits": -1.4446924924850464, "logps": -83.6183090209961, "loss": 151.5536, "objective": 154.1279296875, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5416666865348816, "regularize": 0.37217938899993896, "step": 115, "wo_beta": 16.30828094482422 }, { "dpo_loss": 0.6518290042877197, "epoch": 0.3401039206424185, "grad_norm": 15444.050796566442, "learning_rate": 4.997321195347154e-07, "logits": -1.4417078495025635, "logps": -83.37753295898438, "loss": 159.0179, "objective": 166.70437622070312, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5416666865348816, "regularize": 0.36605674028396606, "step": 120, "wo_beta": 15.73963451385498 }, { "dpo_loss": 0.655017614364624, "epoch": 0.35427491733585265, "grad_norm": 16163.2780124362, "learning_rate": 4.995066821070679e-07, "logits": -1.479369044303894, "logps": -86.81266021728516, "loss": 152.8657, "objective": 143.82176208496094, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5083333253860474, "regularize": 0.3394821584224701, "step": 125, "wo_beta": 16.155017852783203 }, { "dpo_loss": 0.6490565538406372, "epoch": 0.3684459140292867, "grad_norm": 21066.462923637613, "learning_rate": 4.99213032043841e-07, "logits": -1.4559980630874634, "logps": -84.79336547851562, "loss": 159.2873, "objective": 164.24014282226562, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5916666388511658, "ranking_simple": 0.5791666507720947, "regularize": 0.36314311623573303, "step": 130, "wo_beta": 16.631132125854492 }, { "dpo_loss": 0.6596164703369141, "epoch": 0.3826169107227208, "grad_norm": 25406.331880366477, "learning_rate": 4.988512496260301e-07, "logits": -1.4966251850128174, "logps": -85.71400451660156, "loss": 162.2932, "objective": 171.63385009765625, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5, "regularize": 0.3589702546596527, "step": 135, "wo_beta": 14.497623443603516 }, { "dpo_loss": 0.6495281457901001, "epoch": 0.39678790741615494, "grad_norm": 18698.7190702899, "learning_rate": 4.984214337613357e-07, "logits": -1.4679287672042847, "logps": -85.32872772216797, "loss": 145.3416, "objective": 162.12405395507812, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5583333373069763, "regularize": 0.37301716208457947, "step": 140, "wo_beta": 14.43948745727539 }, { "dpo_loss": 0.6436702013015747, "epoch": 0.410958904109589, "grad_norm": 17254.43790008681, "learning_rate": 4.979237019571234e-07, "logits": -1.4821332693099976, "logps": -85.37076568603516, "loss": 150.1815, "objective": 147.49313354492188, "ranking_idealized": 0.6666666865348816, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.5958333611488342, "regularize": 0.35633403062820435, "step": 145, "wo_beta": 14.047316551208496 }, { "dpo_loss": 0.6469600796699524, "epoch": 0.42512990080302315, "grad_norm": 15151.951735697437, "learning_rate": 4.973581902882989e-07, "logits": -1.507290005683899, "logps": -84.8727035522461, "loss": 150.2379, "objective": 155.10818481445312, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.5625, "regularize": 0.35707762837409973, "step": 150, "wo_beta": 17.33567237854004 }, { "epoch": 0.42512990080302315, "eval_dpo_loss": 0.6805834770202637, "eval_logits": -1.4576332569122314, "eval_logps": -90.24694061279297, "eval_loss": 180.0574951171875, "eval_objective": 177.15777587890625, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5331262946128845, "eval_regularize": 0.40097880363464355, "eval_runtime": 369.0413, "eval_samples_per_second": 15.689, "eval_steps_per_second": 1.309, "eval_wo_beta": 16.610729217529297, "step": 150 }, { "dpo_loss": 0.645554780960083, "epoch": 0.43930089749645723, "grad_norm": 16201.615637764453, "learning_rate": 4.967250533601059e-07, "logits": -1.5539363622665405, "logps": -83.4765396118164, "loss": 146.8879, "objective": 153.79454040527344, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.49166667461395264, "regularize": 0.3449983596801758, "step": 155, "wo_beta": 15.733673095703125 }, { "dpo_loss": 0.6385053396224976, "epoch": 0.45347189418989137, "grad_norm": 15493.137210302475, "learning_rate": 4.960244642658585e-07, "logits": -1.4331082105636597, "logps": -84.22053527832031, "loss": 152.5499, "objective": 151.0172882080078, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5625, "regularize": 0.34484514594078064, "step": 160, "wo_beta": 14.795166015625 }, { "dpo_loss": 0.6392009854316711, "epoch": 0.46764289088332545, "grad_norm": 17120.620589579255, "learning_rate": 4.952566145396196e-07, "logits": -1.5298134088516235, "logps": -85.76438903808594, "loss": 144.1923, "objective": 138.38796997070312, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.550000011920929, "regularize": 0.32356587052345276, "step": 165, "wo_beta": 15.664267539978027 }, { "dpo_loss": 0.6427010893821716, "epoch": 0.4818138875767596, "grad_norm": 16261.789384415662, "learning_rate": 4.944217141038378e-07, "logits": -1.5661406517028809, "logps": -85.18688201904297, "loss": 146.9829, "objective": 135.14251708984375, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5166666507720947, "regularize": 0.32022854685783386, "step": 170, "wo_beta": 16.11832046508789 }, { "dpo_loss": 0.6393853425979614, "epoch": 0.49598488427019366, "grad_norm": 17356.348275057364, "learning_rate": 4.935199912119557e-07, "logits": -1.4016886949539185, "logps": -86.42796325683594, "loss": 138.7378, "objective": 130.81809997558594, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.550000011920929, "regularize": 0.3065473735332489, "step": 175, "wo_beta": 18.01983070373535 }, { "dpo_loss": 0.6324561238288879, "epoch": 0.5101558809636277, "grad_norm": 16878.815105316982, "learning_rate": 4.925516923860082e-07, "logits": -1.387779951095581, "logps": -85.71736907958984, "loss": 143.5962, "objective": 155.15878295898438, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5083333253860474, "regularize": 0.34209319949150085, "step": 180, "wo_beta": 15.11577033996582 }, { "dpo_loss": 0.6253044605255127, "epoch": 0.5243268776570619, "grad_norm": 18146.789309631047, "learning_rate": 4.91517082349226e-07, "logits": -1.4047019481658936, "logps": -84.99198913574219, "loss": 137.7836, "objective": 136.2899627685547, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5291666388511658, "regularize": 0.32172784209251404, "step": 185, "wo_beta": 14.294845581054688 }, { "dpo_loss": 0.618080735206604, "epoch": 0.538497874350496, "grad_norm": 16414.68768978514, "learning_rate": 4.904164439536626e-07, "logits": -1.438673496246338, "logps": -84.07417297363281, "loss": 134.8449, "objective": 134.37405395507812, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.6083333492279053, "regularize": 0.30226340889930725, "step": 190, "wo_beta": 17.166841506958008 }, { "dpo_loss": 0.6151688694953918, "epoch": 0.5526688710439301, "grad_norm": 17479.85306665603, "learning_rate": 4.892500781028655e-07, "logits": -1.4530678987503052, "logps": -84.28058624267578, "loss": 143.4298, "objective": 148.5701141357422, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5666666626930237, "regularize": 0.32925572991371155, "step": 195, "wo_beta": 15.276497840881348 }, { "dpo_loss": 0.6147686243057251, "epoch": 0.5668398677373642, "grad_norm": 17426.518013700184, "learning_rate": 4.880183036696122e-07, "logits": -1.461132287979126, "logps": -84.65553283691406, "loss": 135.925, "objective": 143.74124145507812, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5291666388511658, "regularize": 0.3410433530807495, "step": 200, "wo_beta": 16.30559539794922 }, { "epoch": 0.5668398677373642, "eval_dpo_loss": 0.6795096397399902, "eval_logits": -1.445342779159546, "eval_logps": -91.12489318847656, "eval_loss": 179.97398376464844, "eval_objective": 177.04129028320312, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5305383205413818, "eval_regularize": 0.40064504742622375, "eval_runtime": 371.8339, "eval_samples_per_second": 15.571, "eval_steps_per_second": 1.299, "eval_wo_beta": 16.26874351501465, "step": 200 }, { "dpo_loss": 0.6258116960525513, "epoch": 0.5810108644307983, "grad_norm": 16576.414881735825, "learning_rate": 4.867214574087337e-07, "logits": -1.3335182666778564, "logps": -85.45887756347656, "loss": 131.7019, "objective": 124.74952697753906, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5208333134651184, "regularize": 0.30992764234542847, "step": 205, "wo_beta": 16.508281707763672 }, { "dpo_loss": 0.6121302247047424, "epoch": 0.5951818611242324, "grad_norm": 19803.851938298896, "learning_rate": 4.853598938650486e-07, "logits": -1.3813374042510986, "logps": -85.58969116210938, "loss": 142.8564, "objective": 147.2575225830078, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5458333492279053, "regularize": 0.34093615412712097, "step": 210, "wo_beta": 15.581511497497559 }, { "dpo_loss": 0.6104889512062073, "epoch": 0.6093528578176665, "grad_norm": 16635.20929245541, "learning_rate": 4.839339852764349e-07, "logits": -1.4747200012207031, "logps": -84.081298828125, "loss": 132.2213, "objective": 143.8260040283203, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5249999761581421, "regularize": 0.3345707654953003, "step": 215, "wo_beta": 16.507150650024414 }, { "dpo_loss": 0.6056095957756042, "epoch": 0.6235238545111006, "grad_norm": 17892.305863583017, "learning_rate": 4.824441214720628e-07, "logits": -1.4685624837875366, "logps": -84.3477783203125, "loss": 136.6061, "objective": 134.5189971923828, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.512499988079071, "regularize": 0.3130161464214325, "step": 220, "wo_beta": 16.86256217956543 }, { "dpo_loss": 0.6179187297821045, "epoch": 0.6376948512045347, "grad_norm": 15080.021415971216, "learning_rate": 4.808907097658205e-07, "logits": -1.5259219408035278, "logps": -85.83920288085938, "loss": 132.0525, "objective": 135.1264190673828, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.512499988079071, "regularize": 0.310780793428421, "step": 225, "wo_beta": 15.415994644165039 }, { "dpo_loss": 0.613567590713501, "epoch": 0.6518658478979689, "grad_norm": 15927.895183583649, "learning_rate": 4.792741748449574e-07, "logits": -1.4311482906341553, "logps": -85.6706314086914, "loss": 121.739, "objective": 121.26396179199219, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.5916666388511658, "regularize": 0.29853278398513794, "step": 230, "wo_beta": 16.95390510559082 }, { "dpo_loss": 0.6123810410499573, "epoch": 0.6660368445914029, "grad_norm": 18377.873397463725, "learning_rate": 4.775949586539803e-07, "logits": -1.3708454370498657, "logps": -86.44990539550781, "loss": 121.6467, "objective": 109.16976165771484, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5583333373069763, "regularize": 0.2606847584247589, "step": 235, "wo_beta": 16.56985092163086 }, { "dpo_loss": 0.6006901264190674, "epoch": 0.680207841284837, "grad_norm": 20176.56902743112, "learning_rate": 4.758535202738287e-07, "logits": -1.5398815870285034, "logps": -86.04439544677734, "loss": 135.6713, "objective": 137.5254364013672, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5416666865348816, "regularize": 0.29497501254081726, "step": 240, "wo_beta": 16.538175582885742 }, { "dpo_loss": 0.6231993436813354, "epoch": 0.6943788379782712, "grad_norm": 16269.619431330953, "learning_rate": 4.7405033579636755e-07, "logits": -1.5562618970870972, "logps": -86.00993347167969, "loss": 122.2342, "objective": 110.85368347167969, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.574999988079071, "regularize": 0.2635067105293274, "step": 245, "wo_beta": 16.003578186035156 }, { "dpo_loss": 0.608769953250885, "epoch": 0.7085498346717053, "grad_norm": 16883.673846830992, "learning_rate": 4.721858981942284e-07, "logits": -1.3197777271270752, "logps": -84.4737319946289, "loss": 130.7065, "objective": 131.6678924560547, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5708333253860474, "regularize": 0.29679057002067566, "step": 250, "wo_beta": 15.271538734436035 }, { "epoch": 0.7085498346717053, "eval_dpo_loss": 0.680046558380127, "eval_logits": -1.5061429738998413, "eval_logps": -91.61782836914062, "eval_loss": 181.50924682617188, "eval_objective": 178.27838134765625, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5305383205413818, "eval_regularize": 0.404918372631073, "eval_runtime": 368.2121, "eval_samples_per_second": 15.725, "eval_steps_per_second": 1.312, "eval_wo_beta": 16.640703201293945, "step": 250 }, { "dpo_loss": 0.6077960729598999, "epoch": 0.7227208313651393, "grad_norm": 16400.742832285967, "learning_rate": 4.702607171860353e-07, "logits": -1.582943320274353, "logps": -83.8155746459961, "loss": 128.7728, "objective": 116.49504852294922, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.42916667461395264, "ranking_simple": 0.4833333194255829, "regularize": 0.28917694091796875, "step": 255, "wo_beta": 15.234207153320312 }, { "dpo_loss": 0.6078327298164368, "epoch": 0.7368918280585735, "grad_norm": 19074.785460432806, "learning_rate": 4.6827531909705327e-07, "logits": -1.5513815879821777, "logps": -85.73532104492188, "loss": 120.8491, "objective": 135.26841735839844, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.512499988079071, "regularize": 0.3036416172981262, "step": 260, "wo_beta": 15.789148330688477 }, { "dpo_loss": 0.6069940328598022, "epoch": 0.7510628247520076, "grad_norm": 14839.46399025929, "learning_rate": 4.662302467152955e-07, "logits": -1.4743403196334839, "logps": -85.0496597290039, "loss": 115.1093, "objective": 120.47551727294922, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.6208333373069763, "regularize": 0.29132476449012756, "step": 265, "wo_beta": 16.472087860107422 }, { "dpo_loss": 0.6180242300033569, "epoch": 0.7652338214454416, "grad_norm": 17448.69545980003, "learning_rate": 4.6412605914313143e-07, "logits": -1.5716725587844849, "logps": -85.8649673461914, "loss": 116.6119, "objective": 116.45634460449219, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.4583333432674408, "ranking_simple": 0.4791666567325592, "regularize": 0.25854194164276123, "step": 270, "wo_beta": 14.43771743774414 }, { "dpo_loss": 0.6129618287086487, "epoch": 0.7794048181388757, "grad_norm": 16560.36826913483, "learning_rate": 4.619633316444329e-07, "logits": -1.4034606218338013, "logps": -84.92838287353516, "loss": 111.1039, "objective": 110.11347961425781, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5208333134651184, "regularize": 0.2595362663269043, "step": 275, "wo_beta": 15.505229949951172 }, { "dpo_loss": 0.6037231087684631, "epoch": 0.7935758148323099, "grad_norm": 18592.99967397487, "learning_rate": 4.597426554873036e-07, "logits": -1.5048367977142334, "logps": -85.48095703125, "loss": 124.5191, "objective": 130.4561767578125, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5583333373069763, "regularize": 0.27942749857902527, "step": 280, "wo_beta": 15.601365089416504 }, { "dpo_loss": 0.6074939370155334, "epoch": 0.807746811525744, "grad_norm": 16296.64397951243, "learning_rate": 4.574646377824315e-07, "logits": -1.4947975873947144, "logps": -87.23796844482422, "loss": 116.6867, "objective": 114.05181121826172, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5083333253860474, "regularize": 0.2666790187358856, "step": 285, "wo_beta": 17.003517150878906 }, { "dpo_loss": 0.5908948183059692, "epoch": 0.821917808219178, "grad_norm": 16844.100126093494, "learning_rate": 4.551299013171111e-07, "logits": -1.500679850578308, "logps": -86.47892761230469, "loss": 113.1195, "objective": 110.57820892333984, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5333333611488342, "regularize": 0.25748953223228455, "step": 290, "wo_beta": 16.26107406616211 }, { "dpo_loss": 0.602837860584259, "epoch": 0.8360888049126122, "grad_norm": 15918.83667114978, "learning_rate": 4.5273908438498e-07, "logits": -1.488081693649292, "logps": -86.3355484008789, "loss": 112.7178, "objective": 110.10317993164062, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5041666626930237, "regularize": 0.2576940059661865, "step": 295, "wo_beta": 16.481365203857422 }, { "dpo_loss": 0.6130139231681824, "epoch": 0.8502598016060463, "grad_norm": 16024.42965961574, "learning_rate": 4.502928406115152e-07, "logits": -1.4704513549804688, "logps": -84.77509307861328, "loss": 109.74, "objective": 110.3909683227539, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.5791666507720947, "regularize": 0.25806304812431335, "step": 300, "wo_beta": 14.735386848449707 }, { "epoch": 0.8502598016060463, "eval_dpo_loss": 0.6815229654312134, "eval_logits": -1.476037859916687, "eval_logps": -92.42357635498047, "eval_loss": 180.4923553466797, "eval_objective": 178.13650512695312, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5305383205413818, "eval_regularize": 0.40466198325157166, "eval_runtime": 369.1363, "eval_samples_per_second": 15.685, "eval_steps_per_second": 1.308, "eval_wo_beta": 16.49808120727539, "step": 300 }, { "dpo_loss": 0.6006699204444885, "epoch": 0.8644307982994804, "grad_norm": 16503.480891595664, "learning_rate": 4.4779183877533877e-07, "logits": -1.4632763862609863, "logps": -85.11184692382812, "loss": 110.0493, "objective": 114.68462371826172, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.25510621070861816, "step": 305, "wo_beta": 15.895527839660645 }, { "dpo_loss": 0.6042004227638245, "epoch": 0.8786017949929145, "grad_norm": 15793.814439937723, "learning_rate": 4.4523676262538045e-07, "logits": -1.47891104221344, "logps": -84.36697387695312, "loss": 112.7694, "objective": 122.6783676147461, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5291666388511658, "regularize": 0.2812657058238983, "step": 310, "wo_beta": 15.347722053527832 }, { "dpo_loss": 0.591303825378418, "epoch": 0.8927727916863486, "grad_norm": 17495.093435377945, "learning_rate": 4.426283106939473e-07, "logits": -1.4973819255828857, "logps": -84.14984893798828, "loss": 105.8584, "objective": 102.11656188964844, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.512499988079071, "regularize": 0.23703667521476746, "step": 315, "wo_beta": 15.412622451782227 }, { "dpo_loss": 0.5901548862457275, "epoch": 0.9069437883797827, "grad_norm": 16776.49496984533, "learning_rate": 4.3996719610575215e-07, "logits": -1.4549332857131958, "logps": -86.48863983154297, "loss": 101.9659, "objective": 103.75129699707031, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.6000000238418579, "regularize": 0.2400914579629898, "step": 320, "wo_beta": 15.67490005493164 }, { "dpo_loss": 0.5979679226875305, "epoch": 0.9211147850732169, "grad_norm": 16212.553406478031, "learning_rate": 4.372541463829523e-07, "logits": -1.543658971786499, "logps": -87.07477569580078, "loss": 110.2578, "objective": 94.58499908447266, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5583333373069763, "regularize": 0.23497775197029114, "step": 325, "wo_beta": 15.544549942016602 }, { "dpo_loss": 0.5859458446502686, "epoch": 0.9352857817666509, "grad_norm": 17177.10951815794, "learning_rate": 4.344899032462524e-07, "logits": -1.3802608251571655, "logps": -86.10081481933594, "loss": 113.3699, "objective": 119.8874740600586, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.574999988079071, "regularize": 0.2620205283164978, "step": 330, "wo_beta": 16.999446868896484 }, { "dpo_loss": 0.5934227108955383, "epoch": 0.949456778460085, "grad_norm": 17661.288172623317, "learning_rate": 4.316752224121252e-07, "logits": -1.4096896648406982, "logps": -85.97354125976562, "loss": 112.801, "objective": 102.87708282470703, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5708333253860474, "regularize": 0.226911723613739, "step": 335, "wo_beta": 15.912822723388672 }, { "dpo_loss": 0.5984110236167908, "epoch": 0.9636277751535192, "grad_norm": 16330.575056246405, "learning_rate": 4.2881087338620634e-07, "logits": -1.4624823331832886, "logps": -85.42594909667969, "loss": 112.5159, "objective": 105.5082015991211, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.5916666388511658, "regularize": 0.24777108430862427, "step": 340, "wo_beta": 16.43703269958496 }, { "dpo_loss": 0.5881261825561523, "epoch": 0.9777987718469532, "grad_norm": 16460.598983396474, "learning_rate": 4.258976392529192e-07, "logits": -1.5221667289733887, "logps": -84.57250213623047, "loss": 105.3708, "objective": 96.2402114868164, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5833333134651184, "ranking_simple": 0.625, "regularize": 0.22673317790031433, "step": 345, "wo_beta": 15.497628211975098 }, { "dpo_loss": 0.582562267780304, "epoch": 0.9919697685403873, "grad_norm": 15979.907994389112, "learning_rate": 4.2293631646138735e-07, "logits": -1.4198105335235596, "logps": -87.27174377441406, "loss": 104.2663, "objective": 106.56012725830078, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.6208333373069763, "regularize": 0.2592408061027527, "step": 350, "wo_beta": 15.726160049438477 }, { "epoch": 0.9919697685403873, "eval_dpo_loss": 0.6808353066444397, "eval_logits": -1.5066107511520386, "eval_logps": -92.80049133300781, "eval_loss": 182.25906372070312, "eval_objective": 178.8644256591797, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5289855003356934, "eval_regularize": 0.4057510197162628, "eval_runtime": 368.5888, "eval_samples_per_second": 15.709, "eval_steps_per_second": 1.31, "eval_wo_beta": 16.569448471069336, "step": 350 }, { "dpo_loss": 0.5861319899559021, "epoch": 1.0061407652338215, "grad_norm": 16794.52051588047, "learning_rate": 4.1992771460769325e-07, "logits": -1.6238858699798584, "logps": -84.81636810302734, "loss": 101.4044, "objective": 99.22207641601562, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.574999988079071, "regularize": 0.2406620979309082, "step": 355, "wo_beta": 16.778457641601562 }, { "dpo_loss": 0.5623802542686462, "epoch": 1.0203117619272555, "grad_norm": 17512.309592114732, "learning_rate": 4.168726562135431e-07, "logits": -1.4817250967025757, "logps": -85.78034973144531, "loss": 89.2678, "objective": 87.2276840209961, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5708333253860474, "regularize": 0.20162057876586914, "step": 360, "wo_beta": 16.540082931518555 }, { "dpo_loss": 0.5711230635643005, "epoch": 1.0344827586206897, "grad_norm": 17722.39127835427, "learning_rate": 4.1377197650139734e-07, "logits": -1.5016947984695435, "logps": -86.203369140625, "loss": 94.5137, "objective": 95.95963287353516, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5625, "regularize": 0.21429090201854706, "step": 365, "wo_beta": 15.065950393676758 }, { "dpo_loss": 0.5723836421966553, "epoch": 1.0486537553141237, "grad_norm": 16746.74468565218, "learning_rate": 4.106265231661291e-07, "logits": -1.4276858568191528, "logps": -84.18301391601562, "loss": 87.9015, "objective": 87.47245788574219, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5666666626930237, "regularize": 0.20792974531650543, "step": 370, "wo_beta": 15.806612968444824 }, { "dpo_loss": 0.575459897518158, "epoch": 1.0628247520075578, "grad_norm": 16711.49471758267, "learning_rate": 4.0743715614327314e-07, "logits": -1.4709128141403198, "logps": -84.51998901367188, "loss": 81.3317, "objective": 80.16383361816406, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5666666626930237, "regularize": 0.19415231049060822, "step": 375, "wo_beta": 16.286664962768555 }, { "dpo_loss": 0.5680096745491028, "epoch": 1.076995748700992, "grad_norm": 15938.178621987885, "learning_rate": 4.042047473739277e-07, "logits": -1.4488080739974976, "logps": -86.38304138183594, "loss": 88.4379, "objective": 90.77100372314453, "ranking_idealized": 0.6625000238418579, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.6333333253860474, "regularize": 0.21614128351211548, "step": 380, "wo_beta": 15.816045761108398 }, { "dpo_loss": 0.5828992128372192, "epoch": 1.091166745394426, "grad_norm": 15573.908569824505, "learning_rate": 4.009301805663752e-07, "logits": -1.4298585653305054, "logps": -85.34860229492188, "loss": 90.2727, "objective": 100.38928985595703, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.5791666507720947, "regularize": 0.21903865039348602, "step": 385, "wo_beta": 15.969101905822754 }, { "dpo_loss": 0.5651105046272278, "epoch": 1.10533774208786, "grad_norm": 15903.36624651428, "learning_rate": 3.9761435095448424e-07, "logits": -1.386973261833191, "logps": -86.8327865600586, "loss": 85.9698, "objective": 81.47636413574219, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.612500011920929, "regularize": 0.1953142136335373, "step": 390, "wo_beta": 16.701154708862305 }, { "dpo_loss": 0.5567125082015991, "epoch": 1.1195087387812943, "grad_norm": 17073.21610205935, "learning_rate": 3.942581650529625e-07, "logits": -1.4661533832550049, "logps": -86.22716522216797, "loss": 78.9786, "objective": 86.99171447753906, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5458333492279053, "regularize": 0.19182702898979187, "step": 395, "wo_beta": 15.073732376098633 }, { "dpo_loss": 0.5746569037437439, "epoch": 1.1336797354747283, "grad_norm": 17100.977239850836, "learning_rate": 3.908625404095242e-07, "logits": -1.542074203491211, "logps": -84.60574340820312, "loss": 91.3585, "objective": 91.53578186035156, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5833333134651184, "regularize": 0.2116149365901947, "step": 400, "wo_beta": 15.816818237304688 }, { "epoch": 1.1336797354747283, "eval_dpo_loss": 0.6799505352973938, "eval_logits": -1.478875756263733, "eval_logps": -92.38536071777344, "eval_loss": 180.029541015625, "eval_objective": 177.71481323242188, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5279502868652344, "eval_regularize": 0.402423620223999, "eval_runtime": 374.7281, "eval_samples_per_second": 15.451, "eval_steps_per_second": 1.289, "eval_wo_beta": 16.585174560546875, "step": 400 }, { "dpo_loss": 0.5667446255683899, "epoch": 1.1478507321681626, "grad_norm": 17235.250314074532, "learning_rate": 3.874284053540415e-07, "logits": -1.506400227546692, "logps": -86.09246063232422, "loss": 87.7881, "objective": 99.79354095458984, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.5375000238418579, "regularize": 0.21635954082012177, "step": 405, "wo_beta": 17.256864547729492 }, { "dpo_loss": 0.5707606673240662, "epoch": 1.1620217288615966, "grad_norm": 16725.889726383095, "learning_rate": 3.839566987447491e-07, "logits": -1.5043673515319824, "logps": -85.6620864868164, "loss": 88.1003, "objective": 87.76438903808594, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5666666626930237, "regularize": 0.19931498169898987, "step": 410, "wo_beta": 16.657556533813477 }, { "dpo_loss": 0.5724092721939087, "epoch": 1.1761927255550306, "grad_norm": 14812.855448630145, "learning_rate": 3.804483697115693e-07, "logits": -1.420817255973816, "logps": -85.20552062988281, "loss": 80.2264, "objective": 89.5212173461914, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5874999761581421, "regularize": 0.21374522149562836, "step": 415, "wo_beta": 16.42493438720703 }, { "dpo_loss": 0.5684979557991028, "epoch": 1.1903637222484649, "grad_norm": 16758.534351302478, "learning_rate": 3.769043773966292e-07, "logits": -1.3999756574630737, "logps": -86.63607788085938, "loss": 86.6386, "objective": 74.90924835205078, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5708333253860474, "regularize": 0.17667636275291443, "step": 420, "wo_beta": 16.390174865722656 }, { "dpo_loss": 0.5639453530311584, "epoch": 1.204534718941899, "grad_norm": 16337.276317485204, "learning_rate": 3.733256906920412e-07, "logits": -1.4687834978103638, "logps": -85.14289093017578, "loss": 88.4628, "objective": 91.70726013183594, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.574999988079071, "regularize": 0.21049639582633972, "step": 425, "wo_beta": 15.31130599975586 }, { "dpo_loss": 0.5734551548957825, "epoch": 1.2187057156353331, "grad_norm": 15248.100441462713, "learning_rate": 3.6971328797501735e-07, "logits": -1.448046088218689, "logps": -86.40514373779297, "loss": 77.2816, "objective": 79.60990142822266, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5458333492279053, "regularize": 0.17995795607566833, "step": 430, "wo_beta": 16.629831314086914 }, { "dpo_loss": 0.5668199062347412, "epoch": 1.2328767123287672, "grad_norm": 16306.828228688702, "learning_rate": 3.660681568403909e-07, "logits": -1.4072421789169312, "logps": -85.66299438476562, "loss": 87.0271, "objective": 85.58671569824219, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5958333611488342, "ranking_simple": 0.6208333373069763, "regularize": 0.19756914675235748, "step": 435, "wo_beta": 17.663021087646484 }, { "dpo_loss": 0.5658931732177734, "epoch": 1.2470477090222012, "grad_norm": 20345.22767033362, "learning_rate": 3.623912938306176e-07, "logits": -1.4035090208053589, "logps": -85.33922576904297, "loss": 79.1586, "objective": 74.25531768798828, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.5791666507720947, "regularize": 0.17811766266822815, "step": 440, "wo_beta": 15.194890975952148 }, { "dpo_loss": 0.5541914105415344, "epoch": 1.2612187057156352, "grad_norm": 17279.48326071237, "learning_rate": 3.5868370416333116e-07, "logits": -1.4238730669021606, "logps": -87.22127532958984, "loss": 85.5554, "objective": 88.88020324707031, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6166666746139526, "regularize": 0.21330490708351135, "step": 445, "wo_beta": 17.520837783813477 }, { "dpo_loss": 0.5635745525360107, "epoch": 1.2753897024090695, "grad_norm": 15953.8185394536, "learning_rate": 3.549464014565264e-07, "logits": -1.5866882801055908, "logps": -86.32205963134766, "loss": 77.8925, "objective": 74.42088317871094, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5666666626930237, "regularize": 0.1866002231836319, "step": 450, "wo_beta": 14.815221786499023 }, { "epoch": 1.2753897024090695, "eval_dpo_loss": 0.679172158241272, "eval_logits": -1.4745829105377197, "eval_logps": -92.70618438720703, "eval_loss": 179.24407958984375, "eval_objective": 175.84751892089844, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5274327397346497, "eval_regularize": 0.39890119433403015, "eval_runtime": 369.0794, "eval_samples_per_second": 15.688, "eval_steps_per_second": 1.309, "eval_wo_beta": 16.526927947998047, "step": 450 }, { "dpo_loss": 0.5642288327217102, "epoch": 1.2895606991025035, "grad_norm": 15540.821203538826, "learning_rate": 3.511804074514468e-07, "logits": -1.3849934339523315, "logps": -86.52748107910156, "loss": 78.8058, "objective": 79.82011413574219, "ranking_idealized": 0.5458333492279053, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.512499988079071, "regularize": 0.18714289367198944, "step": 455, "wo_beta": 15.67411994934082 }, { "dpo_loss": 0.5722388625144958, "epoch": 1.3037316957959377, "grad_norm": 17894.656202069662, "learning_rate": 3.4738675173325007e-07, "logits": -1.5175042152404785, "logps": -86.22012329101562, "loss": 80.887, "objective": 79.5283432006836, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5291666388511658, "regularize": 0.18461202085018158, "step": 460, "wo_beta": 16.33133316040039 }, { "dpo_loss": 0.5584803819656372, "epoch": 1.3179026924893718, "grad_norm": 15804.388771620323, "learning_rate": 3.4356647144953003e-07, "logits": -1.4979623556137085, "logps": -84.8365478515625, "loss": 78.4995, "objective": 80.0457763671875, "ranking_idealized": 0.5541666746139526, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5291666388511658, "regularize": 0.19293205440044403, "step": 465, "wo_beta": 14.855480194091797 }, { "dpo_loss": 0.5635024309158325, "epoch": 1.3320736891828058, "grad_norm": 15763.736551798547, "learning_rate": 3.3972061102677124e-07, "logits": -1.5794017314910889, "logps": -83.71866607666016, "loss": 80.6678, "objective": 83.6802749633789, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5916666388511658, "regularize": 0.18187165260314941, "step": 470, "wo_beta": 14.902677536010742 }, { "dpo_loss": 0.5684855580329895, "epoch": 1.34624468587624, "grad_norm": 16747.760130279752, "learning_rate": 3.3585022188481246e-07, "logits": -1.45767343044281, "logps": -85.98019409179688, "loss": 70.8929, "objective": 75.84042358398438, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.6208333373069763, "regularize": 0.19827060401439667, "step": 475, "wo_beta": 16.622520446777344 }, { "dpo_loss": 0.5594444274902344, "epoch": 1.360415682569674, "grad_norm": 15233.336787247383, "learning_rate": 3.3195636214939935e-07, "logits": -1.5256932973861694, "logps": -86.3495101928711, "loss": 72.525, "objective": 74.22408294677734, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5416666865348816, "regularize": 0.1702665388584137, "step": 480, "wo_beta": 17.96377182006836 }, { "dpo_loss": 0.5718420743942261, "epoch": 1.3745866792631083, "grad_norm": 16631.30941131418, "learning_rate": 3.2804009636290396e-07, "logits": -1.5204293727874756, "logps": -84.49634552001953, "loss": 76.2635, "objective": 74.72454071044922, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5375000238418579, "regularize": 0.18596205115318298, "step": 485, "wo_beta": 16.379438400268555 }, { "dpo_loss": 0.5699793696403503, "epoch": 1.3887576759565423, "grad_norm": 18558.655175644297, "learning_rate": 3.241024951932884e-07, "logits": -1.4783555269241333, "logps": -86.9972915649414, "loss": 75.5371, "objective": 76.1517105102539, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5666666626930237, "regularize": 0.18451951444149017, "step": 490, "wo_beta": 15.841702461242676 }, { "dpo_loss": 0.5529130101203918, "epoch": 1.4029286726499763, "grad_norm": 20634.943800946956, "learning_rate": 3.201446351413958e-07, "logits": -1.4992899894714355, "logps": -87.463623046875, "loss": 79.5849, "objective": 71.89769744873047, "ranking_idealized": 0.5583333373069763, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5416666865348816, "regularize": 0.17148242890834808, "step": 495, "wo_beta": 15.643444061279297 }, { "dpo_loss": 0.5567610859870911, "epoch": 1.4170996693434104, "grad_norm": 17182.106354079515, "learning_rate": 3.161675982466454e-07, "logits": -1.4364333152770996, "logps": -86.2386703491211, "loss": 73.5844, "objective": 70.71806335449219, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.5625, "regularize": 0.16988083720207214, "step": 500, "wo_beta": 15.247078895568848 }, { "epoch": 1.4170996693434104, "eval_dpo_loss": 0.6786399483680725, "eval_logits": -1.4849432706832886, "eval_logps": -93.26954650878906, "eval_loss": 180.36428833007812, "eval_objective": 176.233154296875, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5305383205413818, "eval_regularize": 0.3994362950325012, "eval_runtime": 373.0338, "eval_samples_per_second": 15.521, "eval_steps_per_second": 1.295, "eval_wo_beta": 16.500259399414062, "step": 500 }, { "dpo_loss": 0.5469445586204529, "epoch": 1.4312706660368446, "grad_norm": 16377.98808062248, "learning_rate": 3.121724717912138e-07, "logits": -1.507896900177002, "logps": -86.27371978759766, "loss": 81.0237, "objective": 78.97586822509766, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5916666388511658, "regularize": 0.17829085886478424, "step": 505, "wo_beta": 16.57958984375 }, { "dpo_loss": 0.5666388273239136, "epoch": 1.4454416627302786, "grad_norm": 16987.7779441374, "learning_rate": 3.081603480027826e-07, "logits": -1.5229469537734985, "logps": -86.77935028076172, "loss": 72.4223, "objective": 68.7424087524414, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5791666507720947, "regularize": 0.16074414551258087, "step": 510, "wo_beta": 15.368119239807129 }, { "dpo_loss": 0.5596444606781006, "epoch": 1.4596126594237129, "grad_norm": 18192.200248438854, "learning_rate": 3.0413232375593494e-07, "logits": -1.581657886505127, "logps": -87.4643783569336, "loss": 75.4207, "objective": 79.91267395019531, "ranking_idealized": 0.550000011920929, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.5249999761581421, "regularize": 0.19809895753860474, "step": 515, "wo_beta": 16.213024139404297 }, { "dpo_loss": 0.5609327554702759, "epoch": 1.473783656117147, "grad_norm": 17307.001345557863, "learning_rate": 3.000895002722803e-07, "logits": -1.472069501876831, "logps": -84.53739929199219, "loss": 73.445, "objective": 75.21688079833984, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5333333611488342, "regularize": 0.17492397129535675, "step": 520, "wo_beta": 15.897907257080078 }, { "dpo_loss": 0.5437408685684204, "epoch": 1.487954652810581, "grad_norm": 16071.775164367858, "learning_rate": 2.960329828193918e-07, "logits": -1.3647209405899048, "logps": -83.83393859863281, "loss": 80.3729, "objective": 83.74950408935547, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5708333253860474, "regularize": 0.1921227127313614, "step": 525, "wo_beta": 15.854673385620117 }, { "dpo_loss": 0.5658655762672424, "epoch": 1.5021256495040152, "grad_norm": 16145.860622049926, "learning_rate": 2.919638804086369e-07, "logits": -1.5306588411331177, "logps": -86.52985382080078, "loss": 72.6051, "objective": 74.99629211425781, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5583333373069763, "regularize": 0.16594384610652924, "step": 530, "wo_beta": 15.360599517822266 }, { "dpo_loss": 0.5535832643508911, "epoch": 1.5162966461974492, "grad_norm": 16070.421212133448, "learning_rate": 2.878833054919851e-07, "logits": -1.5020090341567993, "logps": -85.10139465332031, "loss": 76.8492, "objective": 71.94368743896484, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5708333253860474, "regularize": 0.17868037521839142, "step": 535, "wo_beta": 15.670897483825684 }, { "dpo_loss": 0.5716097354888916, "epoch": 1.5304676428908834, "grad_norm": 17086.3623131816, "learning_rate": 2.8379237365787425e-07, "logits": -1.4154467582702637, "logps": -85.44342803955078, "loss": 72.6152, "objective": 71.88426208496094, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5833333134651184, "regularize": 0.16814741492271423, "step": 540, "wo_beta": 15.268050193786621 }, { "dpo_loss": 0.5665360689163208, "epoch": 1.5446386395843175, "grad_norm": 15014.903555178817, "learning_rate": 2.7969220332622e-07, "logits": -1.5325461626052856, "logps": -86.13166046142578, "loss": 71.8279, "objective": 66.47465515136719, "ranking_idealized": 0.6583333611488342, "ranking_idealized_expo": 0.5874999761581421, "ranking_simple": 0.6333333253860474, "regularize": 0.15991279482841492, "step": 545, "wo_beta": 15.164175033569336 }, { "dpo_loss": 0.5597227811813354, "epoch": 1.5588096362777515, "grad_norm": 15652.670268373007, "learning_rate": 2.7558391544265126e-07, "logits": -1.5225752592086792, "logps": -86.28731536865234, "loss": 74.752, "objective": 75.39783477783203, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.46666666865348816, "ranking_simple": 0.5249999761581421, "regularize": 0.1811082661151886, "step": 550, "wo_beta": 15.459908485412598 }, { "epoch": 1.5588096362777515, "eval_dpo_loss": 0.6795263886451721, "eval_logits": -1.4832454919815063, "eval_logps": -92.88924407958984, "eval_loss": 181.36459350585938, "eval_objective": 177.22666931152344, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5274327397346497, "eval_regularize": 0.40197721123695374, "eval_runtime": 369.092, "eval_samples_per_second": 15.687, "eval_steps_per_second": 1.309, "eval_wo_beta": 16.55462646484375, "step": 550 }, { "dpo_loss": 0.5626943707466125, "epoch": 1.5729806329711855, "grad_norm": 18195.881077955793, "learning_rate": 2.7146863317205425e-07, "logits": -1.3379462957382202, "logps": -86.09709930419922, "loss": 77.0693, "objective": 73.5696792602539, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5791666507720947, "regularize": 0.1626834124326706, "step": 555, "wo_beta": 14.696831703186035 }, { "dpo_loss": 0.552921712398529, "epoch": 1.5871516296646198, "grad_norm": 17118.412791156054, "learning_rate": 2.67347481591511e-07, "logits": -1.4245628118515015, "logps": -84.61250305175781, "loss": 71.0964, "objective": 72.3261489868164, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5833333134651184, "regularize": 0.1769203245639801, "step": 560, "wo_beta": 16.116395950317383 }, { "dpo_loss": 0.5479990243911743, "epoch": 1.601322626358054, "grad_norm": 17861.734640053382, "learning_rate": 2.6322158738271414e-07, "logits": -1.3948200941085815, "logps": -85.63233184814453, "loss": 69.8501, "objective": 66.04268646240234, "ranking_idealized": 0.5458333492279053, "ranking_idealized_expo": 0.44999998807907104, "ranking_simple": 0.5333333611488342, "regularize": 0.1456415057182312, "step": 565, "wo_beta": 15.080788612365723 }, { "dpo_loss": 0.5674367547035217, "epoch": 1.615493623051488, "grad_norm": 17710.317360917932, "learning_rate": 2.590920785239436e-07, "logits": -1.5569151639938354, "logps": -86.57015228271484, "loss": 67.9067, "objective": 68.46839904785156, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5708333253860474, "regularize": 0.16210322082042694, "step": 570, "wo_beta": 15.696702003479004 }, { "dpo_loss": 0.5638484358787537, "epoch": 1.629664619744922, "grad_norm": 18073.751646088458, "learning_rate": 2.549600839816884e-07, "logits": -1.5135074853897095, "logps": -84.736328125, "loss": 72.0534, "objective": 84.00183868408203, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.6041666865348816, "regularize": 0.1912422776222229, "step": 575, "wo_beta": 16.294113159179688 }, { "dpo_loss": 0.5543667078018188, "epoch": 1.643835616438356, "grad_norm": 16118.144699864974, "learning_rate": 2.508267334019988e-07, "logits": -1.5415210723876953, "logps": -84.3241195678711, "loss": 67.4175, "objective": 66.42247009277344, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5833333134651184, "regularize": 0.1573016196489334, "step": 580, "wo_beta": 14.898622512817383 }, { "dpo_loss": 0.5663701295852661, "epoch": 1.6580066131317903, "grad_norm": 17666.121211791866, "learning_rate": 2.4669315680165195e-07, "logits": -1.3956176042556763, "logps": -84.12041473388672, "loss": 66.3727, "objective": 61.66792678833008, "ranking_idealized": 0.5333333611488342, "ranking_idealized_expo": 0.4625000059604645, "ranking_simple": 0.5083333253860474, "regularize": 0.15171830356121063, "step": 585, "wo_beta": 15.958919525146484 }, { "dpo_loss": 0.558569610118866, "epoch": 1.6721776098252243, "grad_norm": 16850.29970069498, "learning_rate": 2.425604842592169e-07, "logits": -1.4625413417816162, "logps": -84.04219055175781, "loss": 74.8738, "objective": 81.01467895507812, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5874999761581421, "regularize": 0.18167801201343536, "step": 590, "wo_beta": 13.437705039978027 }, { "dpo_loss": 0.5554817914962769, "epoch": 1.6863486065186586, "grad_norm": 17084.24865418485, "learning_rate": 2.384298456061022e-07, "logits": -1.4085568189620972, "logps": -84.96867370605469, "loss": 69.8306, "objective": 80.21109008789062, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5833333134651184, "regularize": 0.18683354556560516, "step": 595, "wo_beta": 17.37420654296875 }, { "dpo_loss": 0.5489537119865417, "epoch": 1.7005196032120926, "grad_norm": 15218.152775914608, "learning_rate": 2.3430237011767164e-07, "logits": -1.4603925943374634, "logps": -85.71869659423828, "loss": 66.606, "objective": 76.13477325439453, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6166666746139526, "regularize": 0.17256483435630798, "step": 600, "wo_beta": 16.145771026611328 }, { "epoch": 1.7005196032120926, "eval_dpo_loss": 0.6788755655288696, "eval_logits": -1.4675469398498535, "eval_logps": -91.61579895019531, "eval_loss": 179.49525451660156, "eval_objective": 176.2792510986328, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5310559272766113, "eval_regularize": 0.3999297320842743, "eval_runtime": 368.9049, "eval_samples_per_second": 15.695, "eval_steps_per_second": 1.309, "eval_wo_beta": 16.6182804107666, "step": 600 }, { "dpo_loss": 0.564985454082489, "epoch": 1.7146905999055266, "grad_norm": 16714.450551200658, "learning_rate": 2.30179186204511e-07, "logits": -1.4053993225097656, "logps": -86.0546646118164, "loss": 69.0228, "objective": 73.44601440429688, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5666666626930237, "regularize": 0.17200763523578644, "step": 605, "wo_beta": 18.306556701660156 }, { "dpo_loss": 0.5740829706192017, "epoch": 1.7288615965989607, "grad_norm": 18689.625067588473, "learning_rate": 2.2606142110393245e-07, "logits": -1.4901000261306763, "logps": -84.50035858154297, "loss": 69.9038, "objective": 70.67005920410156, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.625, "regularize": 0.1648392379283905, "step": 610, "wo_beta": 15.994599342346191 }, { "dpo_loss": 0.5698901414871216, "epoch": 1.743032593292395, "grad_norm": 17284.824092388248, "learning_rate": 2.2195020057179894e-07, "logits": -1.4990768432617188, "logps": -84.69489288330078, "loss": 69.1858, "objective": 62.69770431518555, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5791666507720947, "regularize": 0.1527046412229538, "step": 615, "wo_beta": 15.638397216796875 }, { "dpo_loss": 0.553841233253479, "epoch": 1.7572035899858292, "grad_norm": 16362.729415329737, "learning_rate": 2.1784664857475352e-07, "logits": -1.54779851436615, "logps": -84.9485092163086, "loss": 69.826, "objective": 70.27579498291016, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5833333134651184, "regularize": 0.1771712750196457, "step": 620, "wo_beta": 16.527475357055664 }, { "dpo_loss": 0.5640650987625122, "epoch": 1.7713745866792632, "grad_norm": 16519.00259049995, "learning_rate": 2.1375188698293854e-07, "logits": -1.456007480621338, "logps": -83.91544342041016, "loss": 67.7564, "objective": 73.47472381591797, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5791666507720947, "regularize": 0.16373471915721893, "step": 625, "wo_beta": 15.541656494140625 }, { "dpo_loss": 0.5638567805290222, "epoch": 1.7855455833726972, "grad_norm": 16003.013277681921, "learning_rate": 2.0966703526328726e-07, "logits": -1.4914802312850952, "logps": -85.642822265625, "loss": 68.6768, "objective": 65.9126205444336, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5541666746139526, "regularize": 0.15695922076702118, "step": 630, "wo_beta": 16.262632369995117 }, { "dpo_loss": 0.5648781657218933, "epoch": 1.7997165800661312, "grad_norm": 17168.862791414114, "learning_rate": 2.0559321017347282e-07, "logits": -1.5868287086486816, "logps": -84.44722747802734, "loss": 63.1038, "objective": 59.76757049560547, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5916666388511658, "regularize": 0.15224316716194153, "step": 635, "wo_beta": 17.01032829284668 }, { "dpo_loss": 0.5457783341407776, "epoch": 1.8138875767595655, "grad_norm": 17993.04524523219, "learning_rate": 2.0153152545659796e-07, "logits": -1.5063692331314087, "logps": -85.88375091552734, "loss": 66.3807, "objective": 64.99014282226562, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.550000011920929, "regularize": 0.15310163795948029, "step": 640, "wo_beta": 17.52164649963379 }, { "dpo_loss": 0.5509151220321655, "epoch": 1.8280585734529995, "grad_norm": 19625.243857455473, "learning_rate": 1.9748309153670856e-07, "logits": -1.5516611337661743, "logps": -86.286376953125, "loss": 65.9614, "objective": 64.1622543334961, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6333333253860474, "regularize": 0.1483081430196762, "step": 645, "wo_beta": 16.739274978637695 }, { "dpo_loss": 0.5534842014312744, "epoch": 1.8422295701464337, "grad_norm": 17668.158898193855, "learning_rate": 1.9344901521521498e-07, "logits": -1.6079561710357666, "logps": -86.36868286132812, "loss": 65.4503, "objective": 62.571048736572266, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.14387734234333038, "step": 650, "wo_beta": 15.527718544006348 }, { "epoch": 1.8422295701464337, "eval_dpo_loss": 0.6790141463279724, "eval_logits": -1.504582166671753, "eval_logps": -91.89739990234375, "eval_loss": 180.12484741210938, "eval_objective": 176.55528259277344, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5284678936004639, "eval_regularize": 0.4002520442008972, "eval_runtime": 372.2243, "eval_samples_per_second": 15.555, "eval_steps_per_second": 1.298, "eval_wo_beta": 16.537281036376953, "step": 650 }, { "dpo_loss": 0.5599731206893921, "epoch": 1.8564005668398678, "grad_norm": 18359.19761584605, "learning_rate": 1.8943039936830344e-07, "logits": -1.4786803722381592, "logps": -83.48601531982422, "loss": 69.3454, "objective": 74.32158660888672, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5583333373069763, "regularize": 0.1768590807914734, "step": 655, "wo_beta": 15.510722160339355 }, { "dpo_loss": 0.5625216960906982, "epoch": 1.8705715635333018, "grad_norm": 16101.25212298875, "learning_rate": 1.854283426454209e-07, "logits": -1.545279622077942, "logps": -86.23809051513672, "loss": 65.0768, "objective": 58.69490432739258, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.574999988079071, "regularize": 0.14343543350696564, "step": 660, "wo_beta": 17.122419357299805 }, { "dpo_loss": 0.5581023097038269, "epoch": 1.8847425602267358, "grad_norm": 16788.328924690355, "learning_rate": 1.8144393916891508e-07, "logits": -1.526328206062317, "logps": -85.24169921875, "loss": 63.7541, "objective": 67.25133514404297, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6208333373069763, "regularize": 0.14928290247917175, "step": 665, "wo_beta": 15.62762451171875 }, { "dpo_loss": 0.553265392780304, "epoch": 1.89891355692017, "grad_norm": 18214.847289885216, "learning_rate": 1.7747827823491252e-07, "logits": -1.4548065662384033, "logps": -83.87257385253906, "loss": 57.5593, "objective": 56.684471130371094, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5333333611488342, "regularize": 0.14734616875648499, "step": 670, "wo_beta": 16.462730407714844 }, { "dpo_loss": 0.5662250518798828, "epoch": 1.9130845536136043, "grad_norm": 16267.920642791076, "learning_rate": 1.7353244401551565e-07, "logits": -1.478503704071045, "logps": -84.67176818847656, "loss": 60.0148, "objective": 62.5386962890625, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.574999988079071, "regularize": 0.14488257467746735, "step": 675, "wo_beta": 15.750804901123047 }, { "dpo_loss": 0.5515182018280029, "epoch": 1.9272555503070383, "grad_norm": 17294.380974269086, "learning_rate": 1.6960751526240118e-07, "logits": -1.5540010929107666, "logps": -86.8657455444336, "loss": 60.2369, "objective": 55.505897521972656, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5, "ranking_simple": 0.6041666865348816, "regularize": 0.13418342173099518, "step": 680, "wo_beta": 15.37994384765625 }, { "dpo_loss": 0.5534944534301758, "epoch": 1.9414265470004723, "grad_norm": 17684.725981514333, "learning_rate": 1.6570456501189994e-07, "logits": -1.4706988334655762, "logps": -84.12257385253906, "loss": 60.967, "objective": 55.13488006591797, "ranking_idealized": 0.5291666388511658, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5291666388511658, "regularize": 0.13964900374412537, "step": 685, "wo_beta": 15.740229606628418 }, { "dpo_loss": 0.5616536736488342, "epoch": 1.9555975436939064, "grad_norm": 17181.452666650763, "learning_rate": 1.618246602916397e-07, "logits": -1.547702670097351, "logps": -84.80847930908203, "loss": 56.3999, "objective": 58.78403854370117, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.5916666388511658, "regularize": 0.13416926562786102, "step": 690, "wo_beta": 16.004222869873047 }, { "dpo_loss": 0.5623855590820312, "epoch": 1.9697685403873406, "grad_norm": 15698.603949750828, "learning_rate": 1.579688618288305e-07, "logits": -1.4090545177459717, "logps": -85.53604888916016, "loss": 57.3939, "objective": 54.246089935302734, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.550000011920929, "regularize": 0.12334556132555008, "step": 695, "wo_beta": 16.506813049316406 }, { "dpo_loss": 0.5531891584396362, "epoch": 1.9839395370807746, "grad_norm": 17232.305142451998, "learning_rate": 1.541382237602721e-07, "logits": -1.3992184400558472, "logps": -86.07640075683594, "loss": 62.3615, "objective": 63.35702896118164, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5708333253860474, "regularize": 0.14704561233520508, "step": 700, "wo_beta": 15.364715576171875 }, { "epoch": 1.9839395370807746, "eval_dpo_loss": 0.678434431552887, "eval_logits": -1.4984484910964966, "eval_logps": -91.58750915527344, "eval_loss": 179.3856658935547, "eval_objective": 176.00213623046875, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5300207138061523, "eval_regularize": 0.3992062211036682, "eval_runtime": 368.882, "eval_samples_per_second": 15.696, "eval_steps_per_second": 1.309, "eval_wo_beta": 16.5863037109375, "step": 700 }, { "dpo_loss": 0.5491320490837097, "epoch": 1.9981105337742089, "grad_norm": 18325.752866553015, "learning_rate": 1.5033379334416375e-07, "logits": -1.3390460014343262, "logps": -84.89611053466797, "loss": 62.0311, "objective": 66.05867767333984, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5583333373069763, "regularize": 0.15552859008312225, "step": 705, "wo_beta": 17.203296661376953 }, { "dpo_loss": 0.5555277466773987, "epoch": 2.012281530467643, "grad_norm": 16395.524810634437, "learning_rate": 1.465566106737942e-07, "logits": -1.4830571413040161, "logps": -84.74015045166016, "loss": 56.8191, "objective": 61.21573257446289, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.6041666865348816, "regularize": 0.1427960991859436, "step": 710, "wo_beta": 15.91169261932373 }, { "dpo_loss": 0.5421245098114014, "epoch": 2.026452527161077, "grad_norm": 17457.779859055492, "learning_rate": 1.428077083931907e-07, "logits": -1.5156207084655762, "logps": -84.15880584716797, "loss": 51.1684, "objective": 52.4562873840332, "ranking_idealized": 0.6791666746139526, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.6333333253860474, "regularize": 0.12656886875629425, "step": 715, "wo_beta": 15.599425315856934 }, { "dpo_loss": 0.5437305569648743, "epoch": 2.040623523854511, "grad_norm": 15758.315666253699, "learning_rate": 1.3908811141480406e-07, "logits": -1.4699770212173462, "logps": -84.7515869140625, "loss": 45.1211, "objective": 38.25383758544922, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5958333611488342, "regularize": 0.10041950643062592, "step": 720, "wo_beta": 16.25193214416504 }, { "dpo_loss": 0.545520544052124, "epoch": 2.0547945205479454, "grad_norm": 16201.394561682804, "learning_rate": 1.353988366393083e-07, "logits": -1.5007617473602295, "logps": -86.95757293701172, "loss": 53.076, "objective": 52.1579704284668, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5791666507720947, "regularize": 0.12382372468709946, "step": 725, "wo_beta": 14.748201370239258 }, { "dpo_loss": 0.5444363951683044, "epoch": 2.0689655172413794, "grad_norm": 18420.501731961805, "learning_rate": 1.3174089267758982e-07, "logits": -1.57591712474823, "logps": -84.48290252685547, "loss": 52.1829, "objective": 51.44971466064453, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5791666507720947, "regularize": 0.12323012948036194, "step": 730, "wo_beta": 15.643149375915527 }, { "dpo_loss": 0.5511536002159119, "epoch": 2.0831365139348135, "grad_norm": 15885.917813358059, "learning_rate": 1.2811527957500343e-07, "logits": -1.499257206916809, "logps": -84.50511169433594, "loss": 44.0743, "objective": 49.55210876464844, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.5333333611488342, "regularize": 0.13047467172145844, "step": 735, "wo_beta": 17.085899353027344 }, { "dpo_loss": 0.5526978969573975, "epoch": 2.0973075106282475, "grad_norm": 16073.187372319342, "learning_rate": 1.245229885379699e-07, "logits": -1.5387953519821167, "logps": -84.2315444946289, "loss": 48.6885, "objective": 53.333866119384766, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5958333611488342, "regularize": 0.12818403542041779, "step": 740, "wo_beta": 15.79825496673584 }, { "dpo_loss": 0.5457006692886353, "epoch": 2.1114785073216815, "grad_norm": 18439.02982973444, "learning_rate": 1.209650016629899e-07, "logits": -1.4960881471633911, "logps": -84.55073547363281, "loss": 49.7231, "objective": 46.95569610595703, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5583333373069763, "ranking_simple": 0.6291666626930237, "regularize": 0.11624018102884293, "step": 745, "wo_beta": 15.793415069580078 }, { "dpo_loss": 0.5496144890785217, "epoch": 2.1256495040151155, "grad_norm": 16557.264742662323, "learning_rate": 1.1744229166814886e-07, "logits": -1.511896014213562, "logps": -83.8011245727539, "loss": 48.9708, "objective": 45.016258239746094, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.550000011920929, "regularize": 0.11198277026414871, "step": 750, "wo_beta": 15.621644020080566 }, { "epoch": 2.1256495040151155, "eval_dpo_loss": 0.6794183254241943, "eval_logits": -1.491926908493042, "eval_logps": -92.19331359863281, "eval_loss": 179.8103485107422, "eval_objective": 176.70277404785156, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5274327397346497, "eval_regularize": 0.4011208415031433, "eval_runtime": 369.337, "eval_samples_per_second": 15.677, "eval_steps_per_second": 1.308, "eval_wo_beta": 16.588436126708984, "step": 750 }, { "dpo_loss": 0.5448576807975769, "epoch": 2.13982050070855, "grad_norm": 17193.17890323298, "learning_rate": 1.1395582162718523e-07, "logits": -1.4843658208847046, "logps": -86.97160339355469, "loss": 52.8805, "objective": 47.13671112060547, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5833333134651184, "regularize": 0.11303378641605377, "step": 755, "wo_beta": 16.85382080078125 }, { "dpo_loss": 0.5693633556365967, "epoch": 2.153991497401984, "grad_norm": 16501.093616184942, "learning_rate": 1.10506544706196e-07, "logits": -1.4409741163253784, "logps": -83.32089233398438, "loss": 48.6534, "objective": 42.576026916503906, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5791666507720947, "regularize": 0.10865426063537598, "step": 760, "wo_beta": 14.651175498962402 }, { "dpo_loss": 0.550613522529602, "epoch": 2.168162494095418, "grad_norm": 17100.049348037002, "learning_rate": 1.0709540390305061e-07, "logits": -1.4873898029327393, "logps": -84.55794525146484, "loss": 48.0396, "objective": 42.508872985839844, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.5958333611488342, "regularize": 0.10587514191865921, "step": 765, "wo_beta": 16.372541427612305 }, { "dpo_loss": 0.5513295531272888, "epoch": 2.182333490788852, "grad_norm": 16090.026254449373, "learning_rate": 1.0372333178958462e-07, "logits": -1.5015202760696411, "logps": -84.9146499633789, "loss": 48.0411, "objective": 50.020198822021484, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5708333253860474, "regularize": 0.12123490124940872, "step": 770, "wo_beta": 14.4235200881958 }, { "dpo_loss": 0.5526517629623413, "epoch": 2.196504487482286, "grad_norm": 16092.050129989755, "learning_rate": 1.0039125025664391e-07, "logits": -1.4631909132003784, "logps": -86.0343017578125, "loss": 50.4495, "objective": 48.98077392578125, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.5791666507720947, "ranking_simple": 0.6166666746139526, "regularize": 0.12355396151542664, "step": 775, "wo_beta": 17.455713272094727 }, { "dpo_loss": 0.5543543100357056, "epoch": 2.21067548417572, "grad_norm": 17672.341083343217, "learning_rate": 9.710007026204894e-08, "logits": -1.4037829637527466, "logps": -86.01419067382812, "loss": 48.6758, "objective": 49.57035446166992, "ranking_idealized": 0.512499988079071, "ranking_idealized_expo": 0.44583332538604736, "ranking_simple": 0.48750001192092896, "regularize": 0.1090199202299118, "step": 780, "wo_beta": 15.31540298461914 }, { "dpo_loss": 0.5374571681022644, "epoch": 2.2248464808691546, "grad_norm": 17104.605257535815, "learning_rate": 9.385069158154805e-08, "logits": -1.4150718450546265, "logps": -85.85627746582031, "loss": 47.7928, "objective": 41.365787506103516, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5541666746139526, "regularize": 0.10406169295310974, "step": 785, "wo_beta": 15.260098457336426 }, { "dpo_loss": 0.5317620635032654, "epoch": 2.2390174775625886, "grad_norm": 16507.88081841276, "learning_rate": 9.064400256282755e-08, "logits": -1.399611234664917, "logps": -86.44268035888672, "loss": 52.3325, "objective": 45.95266342163086, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5625, "ranking_simple": 0.637499988079071, "regularize": 0.1156383752822876, "step": 790, "wo_beta": 15.514897346496582 }, { "dpo_loss": 0.5416150689125061, "epoch": 2.2531884742560226, "grad_norm": 14862.627085415834, "learning_rate": 8.748087988264668e-08, "logits": -1.4897602796554565, "logps": -87.291259765625, "loss": 50.1301, "objective": 47.57838439941406, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5874999761581421, "regularize": 0.12325119227170944, "step": 795, "wo_beta": 14.180596351623535 }, { "dpo_loss": 0.5349844694137573, "epoch": 2.2673594709494567, "grad_norm": 16155.361379646765, "learning_rate": 8.436218830716258e-08, "logits": -1.5085468292236328, "logps": -86.47720336914062, "loss": 51.9463, "objective": 60.657657623291016, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5041666626930237, "ranking_simple": 0.5916666388511658, "regularize": 0.13889312744140625, "step": 800, "wo_beta": 14.652113914489746 }, { "epoch": 2.2673594709494567, "eval_dpo_loss": 0.6781792044639587, "eval_logits": -1.4993284940719604, "eval_logps": -92.00647735595703, "eval_loss": 179.21780395507812, "eval_objective": 175.7035675048828, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5289855003356934, "eval_regularize": 0.39864060282707214, "eval_runtime": 370.6492, "eval_samples_per_second": 15.621, "eval_steps_per_second": 1.303, "eval_wo_beta": 16.56892204284668, "step": 800 }, { "dpo_loss": 0.5452725291252136, "epoch": 2.2815304676428907, "grad_norm": 16570.72383235664, "learning_rate": 8.1288780455512e-08, "logits": -1.5190993547439575, "logps": -84.90750122070312, "loss": 45.9105, "objective": 44.02467346191406, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5666666626930237, "regularize": 0.10332147032022476, "step": 805, "wo_beta": 16.12928581237793 }, { "dpo_loss": 0.5472940802574158, "epoch": 2.295701464336325, "grad_norm": 17306.744795453895, "learning_rate": 7.826149656671385e-08, "logits": -1.6159324645996094, "logps": -84.2494888305664, "loss": 48.3513, "objective": 51.42966079711914, "ranking_idealized": 0.6458333134651184, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.637499988079071, "regularize": 0.1283574104309082, "step": 810, "wo_beta": 14.84490966796875 }, { "dpo_loss": 0.5408957004547119, "epoch": 2.309872461029759, "grad_norm": 17714.10311614495, "learning_rate": 7.528116426995604e-08, "logits": -1.5414897203445435, "logps": -86.03276824951172, "loss": 44.4155, "objective": 42.15602493286133, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.47083333134651184, "ranking_simple": 0.5625, "regularize": 0.09716067463159561, "step": 815, "wo_beta": 15.411224365234375 }, { "dpo_loss": 0.5434551239013672, "epoch": 2.324043457723193, "grad_norm": 16657.377741221597, "learning_rate": 7.234859835833021e-08, "logits": -1.4976943731307983, "logps": -85.22306060791016, "loss": 44.4464, "objective": 42.74842834472656, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.574999988079071, "regularize": 0.1011769101023674, "step": 820, "wo_beta": 15.093592643737793 }, { "dpo_loss": 0.5428169965744019, "epoch": 2.3382144544166272, "grad_norm": 16447.115430947913, "learning_rate": 6.94646005660749e-08, "logits": -1.5226491689682007, "logps": -84.4798812866211, "loss": 45.0833, "objective": 45.554691314697266, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.612500011920929, "regularize": 0.11467791348695755, "step": 825, "wo_beta": 16.547521591186523 }, { "dpo_loss": 0.5347627997398376, "epoch": 2.3523854511100613, "grad_norm": 16335.000287824352, "learning_rate": 6.662995934939006e-08, "logits": -1.5204423666000366, "logps": -86.52505493164062, "loss": 49.1483, "objective": 49.5906867980957, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.6166666746139526, "regularize": 0.11676025390625, "step": 830, "wo_beta": 16.500276565551758 }, { "dpo_loss": 0.5479462742805481, "epoch": 2.3665564478034957, "grad_norm": 16959.398846741216, "learning_rate": 6.384544967088063e-08, "logits": -1.462269902229309, "logps": -86.40924835205078, "loss": 48.4639, "objective": 45.90879821777344, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.5958333611488342, "regularize": 0.11642123758792877, "step": 835, "wo_beta": 15.002260208129883 }, { "dpo_loss": 0.5614480376243591, "epoch": 2.3807274444969297, "grad_norm": 15851.156516804678, "learning_rate": 6.111183278768955e-08, "logits": -1.4380650520324707, "logps": -86.09837341308594, "loss": 40.9965, "objective": 37.455379486083984, "ranking_idealized": 0.5249999761581421, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5, "regularize": 0.0973404198884964, "step": 840, "wo_beta": 15.861971855163574 }, { "dpo_loss": 0.5543637871742249, "epoch": 2.3948984411903638, "grad_norm": 19702.171994535533, "learning_rate": 5.842985604337769e-08, "logits": -1.4723432064056396, "logps": -87.03990173339844, "loss": 47.1256, "objective": 46.43952941894531, "ranking_idealized": 0.5958333611488342, "ranking_idealized_expo": 0.5416666865348816, "ranking_simple": 0.5833333134651184, "regularize": 0.11250331997871399, "step": 845, "wo_beta": 16.375986099243164 }, { "dpo_loss": 0.5473025441169739, "epoch": 2.409069437883798, "grad_norm": 16397.021305258124, "learning_rate": 5.5800252663607636e-08, "logits": -1.509826898574829, "logps": -85.1669692993164, "loss": 44.3463, "objective": 44.0050048828125, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5583333373069763, "regularize": 0.10994389653205872, "step": 850, "wo_beta": 15.741286277770996 }, { "epoch": 2.409069437883798, "eval_dpo_loss": 0.6782696843147278, "eval_logits": -1.4917659759521484, "eval_logps": -92.23719787597656, "eval_loss": 179.17352294921875, "eval_objective": 175.77769470214844, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5284678936004639, "eval_regularize": 0.39879217743873596, "eval_runtime": 368.8355, "eval_samples_per_second": 15.698, "eval_steps_per_second": 1.31, "eval_wo_beta": 16.56818962097168, "step": 850 }, { "dpo_loss": 0.5434221625328064, "epoch": 2.423240434577232, "grad_norm": 14761.032913238774, "learning_rate": 5.3223741555686873e-08, "logits": -1.537110447883606, "logps": -84.45201110839844, "loss": 43.1862, "objective": 41.27051544189453, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5291666388511658, "ranking_simple": 0.5833333134651184, "regularize": 0.10142151266336441, "step": 855, "wo_beta": 15.314756393432617 }, { "dpo_loss": 0.5529573559761047, "epoch": 2.4374114312706663, "grad_norm": 16145.709635641595, "learning_rate": 5.070102711202606e-08, "logits": -1.4745042324066162, "logps": -85.98120880126953, "loss": 42.9052, "objective": 38.47713088989258, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.574999988079071, "regularize": 0.09888458997011185, "step": 860, "wo_beta": 16.20383644104004 }, { "dpo_loss": 0.551279604434967, "epoch": 2.4515824279641003, "grad_norm": 18278.1439042312, "learning_rate": 4.8232799017564967e-08, "logits": -1.4951705932617188, "logps": -85.17224884033203, "loss": 44.7949, "objective": 45.912452697753906, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.574999988079071, "regularize": 0.11339430510997772, "step": 865, "wo_beta": 16.40469741821289 }, { "dpo_loss": 0.5488017201423645, "epoch": 2.4657534246575343, "grad_norm": 15873.838165857398, "learning_rate": 4.5819732061219475e-08, "logits": -1.5395283699035645, "logps": -85.87442779541016, "loss": 45.7505, "objective": 46.77009963989258, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5208333134651184, "ranking_simple": 0.5833333134651184, "regularize": 0.11627896875143051, "step": 870, "wo_beta": 15.758785247802734 }, { "dpo_loss": 0.5411531329154968, "epoch": 2.4799244213509684, "grad_norm": 16103.251582257983, "learning_rate": 4.346248595140112e-08, "logits": -1.4675084352493286, "logps": -85.36338806152344, "loss": 46.2974, "objective": 47.864070892333984, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5708333253860474, "ranking_simple": 0.6000000238418579, "regularize": 0.11755504459142685, "step": 875, "wo_beta": 17.365110397338867 }, { "dpo_loss": 0.5485495924949646, "epoch": 2.4940954180444024, "grad_norm": 15694.189473425999, "learning_rate": 4.116170513565942e-08, "logits": -1.3954468965530396, "logps": -85.28124237060547, "loss": 41.7812, "objective": 37.13809585571289, "ranking_idealized": 0.5625, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5541666746139526, "regularize": 0.0931786298751831, "step": 880, "wo_beta": 17.850427627563477 }, { "dpo_loss": 0.5530834794044495, "epoch": 2.5082664147378364, "grad_norm": 17596.947495882203, "learning_rate": 3.8918018624496286e-08, "logits": -1.562106728553772, "logps": -84.21708679199219, "loss": 45.0999, "objective": 50.288021087646484, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5874999761581421, "regularize": 0.11681138724088669, "step": 885, "wo_beta": 17.1990909576416 }, { "dpo_loss": 0.5438919067382812, "epoch": 2.5224374114312704, "grad_norm": 17590.89973277564, "learning_rate": 3.673203981940068e-08, "logits": -1.4610990285873413, "logps": -83.57906341552734, "loss": 45.9865, "objective": 43.9241828918457, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5708333253860474, "regularize": 0.11596141010522842, "step": 890, "wo_beta": 14.2067232131958 }, { "dpo_loss": 0.5475446581840515, "epoch": 2.536608408124705, "grad_norm": 16852.292477633127, "learning_rate": 3.46043663451511e-08, "logits": -1.456311583518982, "logps": -85.9644775390625, "loss": 40.1267, "objective": 40.91215133666992, "ranking_idealized": 0.5458333492279053, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5375000238418579, "regularize": 0.0973983108997345, "step": 895, "wo_beta": 16.769834518432617 }, { "dpo_loss": 0.5658264756202698, "epoch": 2.550779404818139, "grad_norm": 16973.16020360833, "learning_rate": 3.2535579886430715e-08, "logits": -1.4089369773864746, "logps": -84.34557342529297, "loss": 44.3015, "objective": 48.17121124267578, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5874999761581421, "regularize": 0.10806681215763092, "step": 900, "wo_beta": 15.534666061401367 }, { "epoch": 2.550779404818139, "eval_dpo_loss": 0.678382933139801, "eval_logits": -1.4982877969741821, "eval_logps": -92.18975830078125, "eval_loss": 179.15904235839844, "eval_objective": 175.8240203857422, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5279502868652344, "eval_regularize": 0.3989744186401367, "eval_runtime": 368.6241, "eval_samples_per_second": 15.707, "eval_steps_per_second": 1.31, "eval_wo_beta": 16.590484619140625, "step": 900 }, { "dpo_loss": 0.5509156584739685, "epoch": 2.564950401511573, "grad_norm": 14234.779776824053, "learning_rate": 3.052624602880063e-08, "logits": -1.5034754276275635, "logps": -84.19306945800781, "loss": 40.4357, "objective": 38.76914978027344, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5458333492279053, "regularize": 0.09935871511697769, "step": 905, "wo_beta": 15.227039337158203 }, { "dpo_loss": 0.5412671566009521, "epoch": 2.579121398205007, "grad_norm": 16823.804354079846, "learning_rate": 2.8576914104074423e-08, "logits": -1.4797313213348389, "logps": -87.3152084350586, "loss": 41.7421, "objective": 45.547401428222656, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.4958333373069763, "ranking_simple": 0.5874999761581421, "regularize": 0.10913080722093582, "step": 910, "wo_beta": 16.365530014038086 }, { "dpo_loss": 0.5502139329910278, "epoch": 2.593292394898441, "grad_norm": 15853.539663724245, "learning_rate": 2.668811704013646e-08, "logits": -1.5921828746795654, "logps": -86.05388641357422, "loss": 42.1486, "objective": 39.99686050415039, "ranking_idealized": 0.6291666626930237, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.6041666865348816, "regularize": 0.103940449655056, "step": 915, "wo_beta": 15.442140579223633 }, { "dpo_loss": 0.5391423106193542, "epoch": 2.6074633915918755, "grad_norm": 16570.329855367927, "learning_rate": 2.486037121524448e-08, "logits": -1.4353820085525513, "logps": -85.45365905761719, "loss": 46.2712, "objective": 45.818546295166016, "ranking_idealized": 0.6041666865348816, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5916666388511658, "regularize": 0.10537134110927582, "step": 920, "wo_beta": 15.817992210388184 }, { "dpo_loss": 0.5430201888084412, "epoch": 2.6216343882853095, "grad_norm": 16860.801218719196, "learning_rate": 2.3094176316856978e-08, "logits": -1.4627550840377808, "logps": -85.5040512084961, "loss": 41.8789, "objective": 39.82048797607422, "ranking_idealized": 0.574999988079071, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5458333492279053, "regularize": 0.10451699048280716, "step": 925, "wo_beta": 16.46214485168457 }, { "dpo_loss": 0.5547136068344116, "epoch": 2.6358053849787435, "grad_norm": 18548.498992469253, "learning_rate": 2.1390015205023898e-08, "logits": -1.4610332250595093, "logps": -85.3515625, "loss": 44.5914, "objective": 45.85152053833008, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.44583332538604736, "ranking_simple": 0.5166666507720947, "regularize": 0.11503276228904724, "step": 930, "wo_beta": 16.94695281982422 }, { "dpo_loss": 0.5648698210716248, "epoch": 2.6499763816721775, "grad_norm": 16823.626472777796, "learning_rate": 1.974835378037723e-08, "logits": -1.4719030857086182, "logps": -84.22066497802734, "loss": 42.8827, "objective": 47.928733825683594, "ranking_idealized": 0.6499999761581421, "ranking_idealized_expo": 0.574999988079071, "ranking_simple": 0.6208333373069763, "regularize": 0.11680851131677628, "step": 935, "wo_beta": 16.337308883666992 }, { "dpo_loss": 0.5417830944061279, "epoch": 2.6641473783656116, "grad_norm": 19648.95461433399, "learning_rate": 1.816964085675865e-08, "logits": -1.50851309299469, "logps": -87.09917449951172, "loss": 44.1259, "objective": 46.947654724121094, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5874999761581421, "regularize": 0.112226702272892, "step": 940, "wo_beta": 17.42079734802246 }, { "dpo_loss": 0.5409041047096252, "epoch": 2.678318375059046, "grad_norm": 17233.04175831224, "learning_rate": 1.6654308038518056e-08, "logits": -1.544434905052185, "logps": -85.57273864746094, "loss": 41.238, "objective": 48.42988967895508, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5583333373069763, "regularize": 0.11696865409612656, "step": 945, "wo_beta": 15.977194786071777 }, { "dpo_loss": 0.554226815700531, "epoch": 2.69248937175248, "grad_norm": 17994.23017373139, "learning_rate": 1.520276960251751e-08, "logits": -1.4273337125778198, "logps": -85.7279281616211, "loss": 43.4164, "objective": 41.65528106689453, "ranking_idealized": 0.637499988079071, "ranking_idealized_expo": 0.5375000238418579, "ranking_simple": 0.6041666865348816, "regularize": 0.1018596738576889, "step": 950, "wo_beta": 15.610276222229004 }, { "epoch": 2.69248937175248, "eval_dpo_loss": 0.6785179376602173, "eval_logits": -1.4967025518417358, "eval_logps": -92.20464324951172, "eval_loss": 179.2801055908203, "eval_objective": 176.04083251953125, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5274327397346497, "eval_regularize": 0.39928528666496277, "eval_runtime": 370.2035, "eval_samples_per_second": 15.64, "eval_steps_per_second": 1.305, "eval_wo_beta": 16.589099884033203, "step": 950 }, { "dpo_loss": 0.5416913628578186, "epoch": 2.706660368445914, "grad_norm": 16913.4446377394, "learning_rate": 1.3815422384871878e-08, "logits": -1.5223019123077393, "logps": -85.46512603759766, "loss": 41.9154, "objective": 39.07767105102539, "ranking_idealized": 0.5874999761581421, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5583333373069763, "regularize": 0.10648568719625473, "step": 955, "wo_beta": 14.056703567504883 }, { "dpo_loss": 0.5538465976715088, "epoch": 2.720831365139348, "grad_norm": 16463.281607975867, "learning_rate": 1.2492645672457836e-08, "logits": -1.4985733032226562, "logps": -85.17173767089844, "loss": 42.6366, "objective": 43.174072265625, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.49166667461395264, "ranking_simple": 0.5708333253860474, "regularize": 0.10360194742679596, "step": 960, "wo_beta": 16.368473052978516 }, { "dpo_loss": 0.537990391254425, "epoch": 2.735002361832782, "grad_norm": 17961.612959662347, "learning_rate": 1.1234801099220786e-08, "logits": -1.509239912033081, "logps": -85.44644165039062, "loss": 39.0678, "objective": 39.20804977416992, "ranking_idealized": 0.6416666507720947, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.6291666626930237, "regularize": 0.10068784654140472, "step": 965, "wo_beta": 15.433809280395508 }, { "dpo_loss": 0.5542954802513123, "epoch": 2.7491733585262166, "grad_norm": 15088.900869572226, "learning_rate": 1.004223254730749e-08, "logits": -1.519822597503662, "logps": -86.41224670410156, "loss": 44.4091, "objective": 43.453468322753906, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.5833333134651184, "regularize": 0.11093102395534515, "step": 970, "wo_beta": 15.656749725341797 }, { "dpo_loss": 0.5459226965904236, "epoch": 2.7633443552196506, "grad_norm": 17863.309850864207, "learning_rate": 8.915266053052373e-09, "logits": -1.4553431272506714, "logps": -84.71710968017578, "loss": 44.5817, "objective": 54.85538101196289, "ranking_idealized": 0.6333333253860474, "ranking_idealized_expo": 0.5333333611488342, "ranking_simple": 0.6083333492279053, "regularize": 0.13891781866550446, "step": 975, "wo_beta": 16.4742374420166 }, { "dpo_loss": 0.5517151355743408, "epoch": 2.7775153519130846, "grad_norm": 17532.917025188563, "learning_rate": 7.85420971784223e-09, "logits": -1.5741106271743774, "logps": -84.43816375732422, "loss": 49.1041, "objective": 50.19886016845703, "ranking_idealized": 0.5916666388511658, "ranking_idealized_expo": 0.5, "ranking_simple": 0.550000011920929, "regularize": 0.12199045717716217, "step": 980, "wo_beta": 14.642070770263672 }, { "dpo_loss": 0.5505563020706177, "epoch": 2.7916863486065187, "grad_norm": 15695.994208260572, "learning_rate": 6.859353623884567e-09, "logits": -1.4095691442489624, "logps": -82.93616485595703, "loss": 43.0667, "objective": 50.173011779785156, "ranking_idealized": 0.5791666507720947, "ranking_idealized_expo": 0.48750001192092896, "ranking_simple": 0.5666666626930237, "regularize": 0.1220245286822319, "step": 985, "wo_beta": 14.67066764831543 }, { "dpo_loss": 0.5449987649917603, "epoch": 2.8058573452999527, "grad_norm": 15965.60549784291, "learning_rate": 5.930969754901843e-09, "logits": -1.484297275543213, "logps": -86.44190979003906, "loss": 44.9483, "objective": 41.374794006347656, "ranking_idealized": 0.5666666626930237, "ranking_idealized_expo": 0.4749999940395355, "ranking_simple": 0.5625, "regularize": 0.10469052940607071, "step": 990, "wo_beta": 16.099658966064453 }, { "dpo_loss": 0.5509870648384094, "epoch": 2.820028341993387, "grad_norm": 15636.732404031713, "learning_rate": 5.069311921774039e-09, "logits": -1.531805396080017, "logps": -84.8103256225586, "loss": 43.9212, "objective": 41.92687225341797, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5458333492279053, "ranking_simple": 0.6083333492279053, "regularize": 0.11357491463422775, "step": 995, "wo_beta": 16.878124237060547 }, { "dpo_loss": 0.544861376285553, "epoch": 2.8341993386868207, "grad_norm": 15284.06322793647, "learning_rate": 4.274615693149075e-09, "logits": -1.4928451776504517, "logps": -83.65907287597656, "loss": 43.6009, "objective": 46.88129425048828, "ranking_idealized": 0.6166666746139526, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.6000000238418579, "regularize": 0.11148179322481155, "step": 1000, "wo_beta": 15.039312362670898 }, { "epoch": 2.8341993386868207, "eval_dpo_loss": 0.6785008311271667, "eval_logits": -1.4978208541870117, "eval_logps": -92.27050018310547, "eval_loss": 179.2790985107422, "eval_objective": 175.9962921142578, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5279502868652344, "eval_regularize": 0.39921271800994873, "eval_runtime": 373.1717, "eval_samples_per_second": 15.516, "eval_steps_per_second": 1.294, "eval_wo_beta": 16.587968826293945, "step": 1000 }, { "dpo_loss": 0.5515304803848267, "epoch": 2.848370335380255, "grad_norm": 16385.652650772114, "learning_rate": 3.547098331040915e-09, "logits": -1.482871413230896, "logps": -84.16107940673828, "loss": 41.5394, "objective": 43.54771041870117, "ranking_idealized": 0.6000000238418579, "ranking_idealized_expo": 0.512499988079071, "ranking_simple": 0.5833333134651184, "regularize": 0.11037115752696991, "step": 1005, "wo_beta": 16.216983795166016 }, { "dpo_loss": 0.5438867211341858, "epoch": 2.862541332073689, "grad_norm": 16774.77560364275, "learning_rate": 2.886958731432132e-09, "logits": -1.542305827140808, "logps": -84.64833068847656, "loss": 43.5186, "objective": 44.43704605102539, "ranking_idealized": 0.612500011920929, "ranking_idealized_expo": 0.5249999761581421, "ranking_simple": 0.5958333611488342, "regularize": 0.10451284050941467, "step": 1010, "wo_beta": 15.962403297424316 }, { "dpo_loss": 0.5501060485839844, "epoch": 2.8767123287671232, "grad_norm": 17522.630643855955, "learning_rate": 2.294377369897793e-09, "logits": -1.4613019227981567, "logps": -83.6242446899414, "loss": 42.9096, "objective": 43.5313720703125, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6208333373069763, "regularize": 0.1068674772977829, "step": 1015, "wo_beta": 15.568625450134277 }, { "dpo_loss": 0.549178957939148, "epoch": 2.8908833254605573, "grad_norm": 16355.818495658159, "learning_rate": 1.769516252265235e-09, "logits": -1.4256434440612793, "logps": -85.48424530029297, "loss": 41.2726, "objective": 41.4518928527832, "ranking_idealized": 0.5833333134651184, "ranking_idealized_expo": 0.5166666507720947, "ranking_simple": 0.5708333253860474, "regularize": 0.0968986377120018, "step": 1020, "wo_beta": 18.603649139404297 }, { "dpo_loss": 0.5602646470069885, "epoch": 2.9050543221539913, "grad_norm": 15838.312706923856, "learning_rate": 1.3125188703233814e-09, "logits": -1.5212275981903076, "logps": -85.22294616699219, "loss": 43.2553, "objective": 43.40156555175781, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5083333253860474, "ranking_simple": 0.6000000238418579, "regularize": 0.1083984375, "step": 1025, "wo_beta": 15.021879196166992 }, { "dpo_loss": 0.5516722798347473, "epoch": 2.9192253188474258, "grad_norm": 16291.39704850031, "learning_rate": 9.235101625932884e-10, "logits": -1.5682528018951416, "logps": -85.36617279052734, "loss": 43.7786, "objective": 39.36371612548828, "ranking_idealized": 0.5416666865348816, "ranking_idealized_expo": 0.4791666567325592, "ranking_simple": 0.5375000238418579, "regularize": 0.0955829992890358, "step": 1030, "wo_beta": 16.441747665405273 }, { "dpo_loss": 0.5399314761161804, "epoch": 2.9333963155408598, "grad_norm": 16748.657990232314, "learning_rate": 6.025964801714411e-10, "logits": -1.5260014533996582, "logps": -86.03439331054688, "loss": 41.3554, "objective": 38.64834976196289, "ranking_idealized": 0.625, "ranking_idealized_expo": 0.5, "ranking_simple": 0.5916666388511658, "regularize": 0.0944010317325592, "step": 1035, "wo_beta": 14.993240356445312 }, { "dpo_loss": 0.5549695491790771, "epoch": 2.947567312234294, "grad_norm": 15029.578861688902, "learning_rate": 3.498655576543441e-10, "logits": -1.5111292600631714, "logps": -85.76802062988281, "loss": 38.8328, "objective": 38.22753143310547, "ranking_idealized": 0.5708333253860474, "ranking_idealized_expo": 0.4833333194255829, "ranking_simple": 0.550000011920929, "regularize": 0.095832958817482, "step": 1040, "wo_beta": 16.434364318847656 }, { "dpo_loss": 0.5555641651153564, "epoch": 2.961738308927728, "grad_norm": 17992.01977336534, "learning_rate": 1.6538648915270793e-10, "logits": -1.481310248374939, "logps": -87.40520477294922, "loss": 38.5959, "objective": 39.465171813964844, "ranking_idealized": 0.6208333373069763, "ranking_idealized_expo": 0.5666666626930237, "ranking_simple": 0.6166666746139526, "regularize": 0.10020165145397186, "step": 1045, "wo_beta": 18.04179573059082 }, { "dpo_loss": 0.5507573485374451, "epoch": 2.975909305621162, "grad_norm": 17353.770623615765, "learning_rate": 4.920970940180957e-11, "logits": -1.522560715675354, "logps": -83.40792083740234, "loss": 47.7054, "objective": 48.42831802368164, "ranking_idealized": 0.6541666388511658, "ranking_idealized_expo": 0.5541666746139526, "ranking_simple": 0.6166666746139526, "regularize": 0.121465764939785, "step": 1050, "wo_beta": 15.164950370788574 }, { "epoch": 2.975909305621162, "eval_dpo_loss": 0.6784854531288147, "eval_logits": -1.4974991083145142, "eval_logps": -92.26132202148438, "eval_loss": 179.26217651367188, "eval_objective": 175.9752197265625, "eval_ranking_idealized": 0.6024844646453857, "eval_ranking_idealized_expo": 0.5232919454574585, "eval_ranking_simple": 0.5279502868652344, "eval_regularize": 0.39915931224823, "eval_runtime": 370.7869, "eval_samples_per_second": 15.615, "eval_steps_per_second": 1.303, "eval_wo_beta": 16.585628509521484, "step": 1050 }, { "dpo_loss": 0.5567707419395447, "epoch": 2.9900803023145963, "grad_norm": 17458.927279367348, "learning_rate": 1.3669799732163311e-12, "logits": -1.4982311725616455, "logps": -84.29004669189453, "loss": 47.3347, "objective": 49.85600662231445, "ranking_idealized": 0.6083333492279053, "ranking_idealized_expo": 0.550000011920929, "ranking_simple": 0.6000000238418579, "regularize": 0.12724058330059052, "step": 1055, "wo_beta": 15.180956840515137 }, { "epoch": 2.992914501653283, "step": 1056, "total_flos": 0.0, "train_loss": 87.93023242011215, "train_runtime": 38512.4809, "train_samples_per_second": 3.957, "train_steps_per_second": 0.027 } ], "logging_steps": 5, "max_steps": 1056, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }