|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.992914501653283, |
|
"eval_steps": 50, |
|
"global_step": 1056, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.002834199338686821, |
|
"grad_norm": 17675.58033930074, |
|
"learning_rate": 4.716981132075472e-09, |
|
"logits": -1.2867579460144043, |
|
"logps": -84.34933471679688, |
|
"loss": 169.5214, |
|
"objective": 153.4677734375, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.3618059456348419, |
|
"step": 1, |
|
"wo_beta": 14.83154582977295 |
|
}, |
|
{ |
|
"dpo_loss": 0.6930959224700928, |
|
"epoch": 0.014170996693434105, |
|
"grad_norm": 16812.231100839916, |
|
"learning_rate": 2.3584905660377358e-08, |
|
"logits": -1.4290882349014282, |
|
"logps": -83.8636474609375, |
|
"loss": 181.7078, |
|
"objective": 168.5659942626953, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.4895833432674408, |
|
"ranking_simple": 0.4895833432674408, |
|
"regularize": 0.40367603302001953, |
|
"step": 5, |
|
"wo_beta": 16.679981231689453 |
|
}, |
|
{ |
|
"dpo_loss": 0.6930798292160034, |
|
"epoch": 0.02834199338686821, |
|
"grad_norm": 18597.19901899509, |
|
"learning_rate": 4.7169811320754715e-08, |
|
"logits": -1.4008352756500244, |
|
"logps": -84.84938049316406, |
|
"loss": 177.1073, |
|
"objective": 170.35797119140625, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.4039422273635864, |
|
"step": 10, |
|
"wo_beta": 15.222180366516113 |
|
}, |
|
{ |
|
"dpo_loss": 0.6921038031578064, |
|
"epoch": 0.042512990080302314, |
|
"grad_norm": 17855.275799007577, |
|
"learning_rate": 7.075471698113207e-08, |
|
"logits": -1.538023829460144, |
|
"logps": -84.5517578125, |
|
"loss": 178.9814, |
|
"objective": 187.4513702392578, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.4422657787799835, |
|
"step": 15, |
|
"wo_beta": 15.718367576599121 |
|
}, |
|
{ |
|
"dpo_loss": 0.6917796730995178, |
|
"epoch": 0.05668398677373642, |
|
"grad_norm": 17564.60110673315, |
|
"learning_rate": 9.433962264150943e-08, |
|
"logits": -1.3617039918899536, |
|
"logps": -83.66792297363281, |
|
"loss": 185.8199, |
|
"objective": 204.0640411376953, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.44205835461616516, |
|
"step": 20, |
|
"wo_beta": 16.52640151977539 |
|
}, |
|
{ |
|
"dpo_loss": 0.6927011013031006, |
|
"epoch": 0.07085498346717052, |
|
"grad_norm": 16991.775752313566, |
|
"learning_rate": 1.1792452830188679e-07, |
|
"logits": -1.3692513704299927, |
|
"logps": -83.765869140625, |
|
"loss": 182.1115, |
|
"objective": 173.06422424316406, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.40760377049446106, |
|
"step": 25, |
|
"wo_beta": 15.608158111572266 |
|
}, |
|
{ |
|
"dpo_loss": 0.6904457211494446, |
|
"epoch": 0.08502598016060463, |
|
"grad_norm": 14856.204222337537, |
|
"learning_rate": 1.4150943396226414e-07, |
|
"logits": -1.4308700561523438, |
|
"logps": -83.563232421875, |
|
"loss": 181.7541, |
|
"objective": 176.98880004882812, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.43005380034446716, |
|
"step": 30, |
|
"wo_beta": 17.01230812072754 |
|
}, |
|
{ |
|
"dpo_loss": 0.6906622648239136, |
|
"epoch": 0.09919697685403873, |
|
"grad_norm": 16081.157641472842, |
|
"learning_rate": 1.650943396226415e-07, |
|
"logits": -1.4087789058685303, |
|
"logps": -82.7640151977539, |
|
"loss": 184.1344, |
|
"objective": 172.8912811279297, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.37934428453445435, |
|
"step": 35, |
|
"wo_beta": 16.152484893798828 |
|
}, |
|
{ |
|
"dpo_loss": 0.6896480917930603, |
|
"epoch": 0.11336797354747284, |
|
"grad_norm": 17056.964984105944, |
|
"learning_rate": 1.8867924528301886e-07, |
|
"logits": -1.4006307125091553, |
|
"logps": -83.35142517089844, |
|
"loss": 188.1977, |
|
"objective": 182.53704833984375, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.40673011541366577, |
|
"step": 40, |
|
"wo_beta": 14.254140853881836 |
|
}, |
|
{ |
|
"dpo_loss": 0.6860460638999939, |
|
"epoch": 0.12753897024090693, |
|
"grad_norm": 15541.887109298903, |
|
"learning_rate": 2.1226415094339622e-07, |
|
"logits": -1.4170690774917603, |
|
"logps": -83.82962799072266, |
|
"loss": 172.0023, |
|
"objective": 184.33473205566406, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.41498348116874695, |
|
"step": 45, |
|
"wo_beta": 14.2799711227417 |
|
}, |
|
{ |
|
"dpo_loss": 0.6840464472770691, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 16674.096437377164, |
|
"learning_rate": 2.3584905660377358e-07, |
|
"logits": -1.4327392578125, |
|
"logps": -84.8567123413086, |
|
"loss": 182.5182, |
|
"objective": 187.45941162109375, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.43751442432403564, |
|
"step": 50, |
|
"wo_beta": 15.616755485534668 |
|
}, |
|
{ |
|
"epoch": 0.14170996693434104, |
|
"eval_dpo_loss": 0.689544677734375, |
|
"eval_logits": -1.4199916124343872, |
|
"eval_logps": -90.85165405273438, |
|
"eval_loss": 182.50025939941406, |
|
"eval_objective": 180.4892578125, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5248447060585022, |
|
"eval_regularize": 0.4092595875263214, |
|
"eval_runtime": 369.2017, |
|
"eval_samples_per_second": 15.682, |
|
"eval_steps_per_second": 1.308, |
|
"eval_wo_beta": 16.310007095336914, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.6828119158744812, |
|
"epoch": 0.15588096362777515, |
|
"grad_norm": 17241.419986006367, |
|
"learning_rate": 2.5943396226415094e-07, |
|
"logits": -1.3938590288162231, |
|
"logps": -84.56362915039062, |
|
"loss": 171.7727, |
|
"objective": 174.47201538085938, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.3751158118247986, |
|
"step": 55, |
|
"wo_beta": 15.145721435546875 |
|
}, |
|
{ |
|
"dpo_loss": 0.6828226447105408, |
|
"epoch": 0.17005196032120926, |
|
"grad_norm": 16193.112384702756, |
|
"learning_rate": 2.830188679245283e-07, |
|
"logits": -1.325377345085144, |
|
"logps": -85.11466217041016, |
|
"loss": 175.0018, |
|
"objective": 174.82723999023438, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.41120079159736633, |
|
"step": 60, |
|
"wo_beta": 14.497312545776367 |
|
}, |
|
{ |
|
"dpo_loss": 0.673675537109375, |
|
"epoch": 0.18422295701464336, |
|
"grad_norm": 16474.473591772632, |
|
"learning_rate": 3.066037735849056e-07, |
|
"logits": -1.4237332344055176, |
|
"logps": -82.10260772705078, |
|
"loss": 174.1747, |
|
"objective": 163.51272583007812, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.3746616542339325, |
|
"step": 65, |
|
"wo_beta": 15.076056480407715 |
|
}, |
|
{ |
|
"dpo_loss": 0.6760156750679016, |
|
"epoch": 0.19839395370807747, |
|
"grad_norm": 15655.300421670066, |
|
"learning_rate": 3.30188679245283e-07, |
|
"logits": -1.4630695581436157, |
|
"logps": -84.45524597167969, |
|
"loss": 175.2515, |
|
"objective": 174.8110809326172, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.40165895223617554, |
|
"step": 70, |
|
"wo_beta": 15.362497329711914 |
|
}, |
|
{ |
|
"dpo_loss": 0.6764008402824402, |
|
"epoch": 0.21256495040151158, |
|
"grad_norm": 18669.073461411434, |
|
"learning_rate": 3.5377358490566033e-07, |
|
"logits": -1.3853403329849243, |
|
"logps": -84.13139343261719, |
|
"loss": 176.1206, |
|
"objective": 166.9723663330078, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.38751351833343506, |
|
"step": 75, |
|
"wo_beta": 15.094878196716309 |
|
}, |
|
{ |
|
"dpo_loss": 0.6740989089012146, |
|
"epoch": 0.22673594709494568, |
|
"grad_norm": 19007.32032313182, |
|
"learning_rate": 3.773584905660377e-07, |
|
"logits": -1.481835126876831, |
|
"logps": -83.50402069091797, |
|
"loss": 170.5001, |
|
"objective": 178.72813415527344, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.42763322591781616, |
|
"step": 80, |
|
"wo_beta": 16.335308074951172 |
|
}, |
|
{ |
|
"dpo_loss": 0.6696261167526245, |
|
"epoch": 0.2409069437883798, |
|
"grad_norm": 15826.33154406391, |
|
"learning_rate": 4.009433962264151e-07, |
|
"logits": -1.4629038572311401, |
|
"logps": -83.72789001464844, |
|
"loss": 173.4106, |
|
"objective": 184.8107452392578, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.41807958483695984, |
|
"step": 85, |
|
"wo_beta": 15.131178855895996 |
|
}, |
|
{ |
|
"dpo_loss": 0.6652686595916748, |
|
"epoch": 0.25507794048181387, |
|
"grad_norm": 16251.074571263394, |
|
"learning_rate": 4.2452830188679244e-07, |
|
"logits": -1.562590479850769, |
|
"logps": -84.50687408447266, |
|
"loss": 170.0469, |
|
"objective": 178.0680694580078, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.40833932161331177, |
|
"step": 90, |
|
"wo_beta": 15.540740966796875 |
|
}, |
|
{ |
|
"dpo_loss": 0.6606998443603516, |
|
"epoch": 0.269248937175248, |
|
"grad_norm": 17104.36417405731, |
|
"learning_rate": 4.481132075471698e-07, |
|
"logits": -1.4817092418670654, |
|
"logps": -84.26000213623047, |
|
"loss": 172.5874, |
|
"objective": 179.69664001464844, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.412168025970459, |
|
"step": 95, |
|
"wo_beta": 16.397871017456055 |
|
}, |
|
{ |
|
"dpo_loss": 0.6613048315048218, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 16520.717626446203, |
|
"learning_rate": 4.7169811320754717e-07, |
|
"logits": -1.3626132011413574, |
|
"logps": -83.24072265625, |
|
"loss": 159.305, |
|
"objective": 164.95997619628906, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.37101998925209045, |
|
"step": 100, |
|
"wo_beta": 15.463290214538574 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 0.6859607100486755, |
|
"eval_logits": -1.4621983766555786, |
|
"eval_logps": -91.35309600830078, |
|
"eval_loss": 182.15220642089844, |
|
"eval_objective": 180.52191162109375, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5310559272766113, |
|
"eval_regularize": 0.41025590896606445, |
|
"eval_runtime": 368.4707, |
|
"eval_samples_per_second": 15.714, |
|
"eval_steps_per_second": 1.311, |
|
"eval_wo_beta": 16.38188362121582, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.6732772588729858, |
|
"epoch": 0.2975909305621162, |
|
"grad_norm": 20087.528218167263, |
|
"learning_rate": 4.952830188679246e-07, |
|
"logits": -1.5680618286132812, |
|
"logps": -86.15119934082031, |
|
"loss": 163.5086, |
|
"objective": 165.89260864257812, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.38661062717437744, |
|
"step": 105, |
|
"wo_beta": 14.799639701843262 |
|
}, |
|
{ |
|
"dpo_loss": 0.6610164046287537, |
|
"epoch": 0.3117619272555503, |
|
"grad_norm": 16022.782640878671, |
|
"learning_rate": 4.999781286194085e-07, |
|
"logits": -1.470965027809143, |
|
"logps": -85.03189849853516, |
|
"loss": 162.0049, |
|
"objective": 163.46115112304688, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.38891494274139404, |
|
"step": 110, |
|
"wo_beta": 15.151239395141602 |
|
}, |
|
{ |
|
"dpo_loss": 0.6605216860771179, |
|
"epoch": 0.32593292394898443, |
|
"grad_norm": 17332.290004559494, |
|
"learning_rate": 4.998892826944417e-07, |
|
"logits": -1.4446924924850464, |
|
"logps": -83.6183090209961, |
|
"loss": 151.5536, |
|
"objective": 154.1279296875, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.37217938899993896, |
|
"step": 115, |
|
"wo_beta": 16.30828094482422 |
|
}, |
|
{ |
|
"dpo_loss": 0.6518290042877197, |
|
"epoch": 0.3401039206424185, |
|
"grad_norm": 15444.050796566442, |
|
"learning_rate": 4.997321195347154e-07, |
|
"logits": -1.4417078495025635, |
|
"logps": -83.37753295898438, |
|
"loss": 159.0179, |
|
"objective": 166.70437622070312, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.36605674028396606, |
|
"step": 120, |
|
"wo_beta": 15.73963451385498 |
|
}, |
|
{ |
|
"dpo_loss": 0.655017614364624, |
|
"epoch": 0.35427491733585265, |
|
"grad_norm": 16163.2780124362, |
|
"learning_rate": 4.995066821070679e-07, |
|
"logits": -1.479369044303894, |
|
"logps": -86.81266021728516, |
|
"loss": 152.8657, |
|
"objective": 143.82176208496094, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.3394821584224701, |
|
"step": 125, |
|
"wo_beta": 16.155017852783203 |
|
}, |
|
{ |
|
"dpo_loss": 0.6490565538406372, |
|
"epoch": 0.3684459140292867, |
|
"grad_norm": 21066.462923637613, |
|
"learning_rate": 4.99213032043841e-07, |
|
"logits": -1.4559980630874634, |
|
"logps": -84.79336547851562, |
|
"loss": 159.2873, |
|
"objective": 164.24014282226562, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5916666388511658, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.36314311623573303, |
|
"step": 130, |
|
"wo_beta": 16.631132125854492 |
|
}, |
|
{ |
|
"dpo_loss": 0.6596164703369141, |
|
"epoch": 0.3826169107227208, |
|
"grad_norm": 25406.331880366477, |
|
"learning_rate": 4.988512496260301e-07, |
|
"logits": -1.4966251850128174, |
|
"logps": -85.71400451660156, |
|
"loss": 162.2932, |
|
"objective": 171.63385009765625, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.3589702546596527, |
|
"step": 135, |
|
"wo_beta": 14.497623443603516 |
|
}, |
|
{ |
|
"dpo_loss": 0.6495281457901001, |
|
"epoch": 0.39678790741615494, |
|
"grad_norm": 18698.7190702899, |
|
"learning_rate": 4.984214337613357e-07, |
|
"logits": -1.4679287672042847, |
|
"logps": -85.32872772216797, |
|
"loss": 145.3416, |
|
"objective": 162.12405395507812, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.37301716208457947, |
|
"step": 140, |
|
"wo_beta": 14.43948745727539 |
|
}, |
|
{ |
|
"dpo_loss": 0.6436702013015747, |
|
"epoch": 0.410958904109589, |
|
"grad_norm": 17254.43790008681, |
|
"learning_rate": 4.979237019571234e-07, |
|
"logits": -1.4821332693099976, |
|
"logps": -85.37076568603516, |
|
"loss": 150.1815, |
|
"objective": 147.49313354492188, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.35633403062820435, |
|
"step": 145, |
|
"wo_beta": 14.047316551208496 |
|
}, |
|
{ |
|
"dpo_loss": 0.6469600796699524, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 15151.951735697437, |
|
"learning_rate": 4.973581902882989e-07, |
|
"logits": -1.507290005683899, |
|
"logps": -84.8727035522461, |
|
"loss": 150.2379, |
|
"objective": 155.10818481445312, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.35707762837409973, |
|
"step": 150, |
|
"wo_beta": 17.33567237854004 |
|
}, |
|
{ |
|
"epoch": 0.42512990080302315, |
|
"eval_dpo_loss": 0.6805834770202637, |
|
"eval_logits": -1.4576332569122314, |
|
"eval_logps": -90.24694061279297, |
|
"eval_loss": 180.0574951171875, |
|
"eval_objective": 177.15777587890625, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5331262946128845, |
|
"eval_regularize": 0.40097880363464355, |
|
"eval_runtime": 369.0413, |
|
"eval_samples_per_second": 15.689, |
|
"eval_steps_per_second": 1.309, |
|
"eval_wo_beta": 16.610729217529297, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.645554780960083, |
|
"epoch": 0.43930089749645723, |
|
"grad_norm": 16201.615637764453, |
|
"learning_rate": 4.967250533601059e-07, |
|
"logits": -1.5539363622665405, |
|
"logps": -83.4765396118164, |
|
"loss": 146.8879, |
|
"objective": 153.79454040527344, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.49166667461395264, |
|
"regularize": 0.3449983596801758, |
|
"step": 155, |
|
"wo_beta": 15.733673095703125 |
|
}, |
|
{ |
|
"dpo_loss": 0.6385053396224976, |
|
"epoch": 0.45347189418989137, |
|
"grad_norm": 15493.137210302475, |
|
"learning_rate": 4.960244642658585e-07, |
|
"logits": -1.4331082105636597, |
|
"logps": -84.22053527832031, |
|
"loss": 152.5499, |
|
"objective": 151.0172882080078, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.34484514594078064, |
|
"step": 160, |
|
"wo_beta": 14.795166015625 |
|
}, |
|
{ |
|
"dpo_loss": 0.6392009854316711, |
|
"epoch": 0.46764289088332545, |
|
"grad_norm": 17120.620589579255, |
|
"learning_rate": 4.952566145396196e-07, |
|
"logits": -1.5298134088516235, |
|
"logps": -85.76438903808594, |
|
"loss": 144.1923, |
|
"objective": 138.38796997070312, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.32356587052345276, |
|
"step": 165, |
|
"wo_beta": 15.664267539978027 |
|
}, |
|
{ |
|
"dpo_loss": 0.6427010893821716, |
|
"epoch": 0.4818138875767596, |
|
"grad_norm": 16261.789384415662, |
|
"learning_rate": 4.944217141038378e-07, |
|
"logits": -1.5661406517028809, |
|
"logps": -85.18688201904297, |
|
"loss": 146.9829, |
|
"objective": 135.14251708984375, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.32022854685783386, |
|
"step": 170, |
|
"wo_beta": 16.11832046508789 |
|
}, |
|
{ |
|
"dpo_loss": 0.6393853425979614, |
|
"epoch": 0.49598488427019366, |
|
"grad_norm": 17356.348275057364, |
|
"learning_rate": 4.935199912119557e-07, |
|
"logits": -1.4016886949539185, |
|
"logps": -86.42796325683594, |
|
"loss": 138.7378, |
|
"objective": 130.81809997558594, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.3065473735332489, |
|
"step": 175, |
|
"wo_beta": 18.01983070373535 |
|
}, |
|
{ |
|
"dpo_loss": 0.6324561238288879, |
|
"epoch": 0.5101558809636277, |
|
"grad_norm": 16878.815105316982, |
|
"learning_rate": 4.925516923860082e-07, |
|
"logits": -1.387779951095581, |
|
"logps": -85.71736907958984, |
|
"loss": 143.5962, |
|
"objective": 155.15878295898438, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.34209319949150085, |
|
"step": 180, |
|
"wo_beta": 15.11577033996582 |
|
}, |
|
{ |
|
"dpo_loss": 0.6253044605255127, |
|
"epoch": 0.5243268776570619, |
|
"grad_norm": 18146.789309631047, |
|
"learning_rate": 4.91517082349226e-07, |
|
"logits": -1.4047019481658936, |
|
"logps": -84.99198913574219, |
|
"loss": 137.7836, |
|
"objective": 136.2899627685547, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.32172784209251404, |
|
"step": 185, |
|
"wo_beta": 14.294845581054688 |
|
}, |
|
{ |
|
"dpo_loss": 0.618080735206604, |
|
"epoch": 0.538497874350496, |
|
"grad_norm": 16414.68768978514, |
|
"learning_rate": 4.904164439536626e-07, |
|
"logits": -1.438673496246338, |
|
"logps": -84.07417297363281, |
|
"loss": 134.8449, |
|
"objective": 134.37405395507812, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.30226340889930725, |
|
"step": 190, |
|
"wo_beta": 17.166841506958008 |
|
}, |
|
{ |
|
"dpo_loss": 0.6151688694953918, |
|
"epoch": 0.5526688710439301, |
|
"grad_norm": 17479.85306665603, |
|
"learning_rate": 4.892500781028655e-07, |
|
"logits": -1.4530678987503052, |
|
"logps": -84.28058624267578, |
|
"loss": 143.4298, |
|
"objective": 148.5701141357422, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.32925572991371155, |
|
"step": 195, |
|
"wo_beta": 15.276497840881348 |
|
}, |
|
{ |
|
"dpo_loss": 0.6147686243057251, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 17426.518013700184, |
|
"learning_rate": 4.880183036696122e-07, |
|
"logits": -1.461132287979126, |
|
"logps": -84.65553283691406, |
|
"loss": 135.925, |
|
"objective": 143.74124145507812, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.3410433530807495, |
|
"step": 200, |
|
"wo_beta": 16.30559539794922 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 0.6795096397399902, |
|
"eval_logits": -1.445342779159546, |
|
"eval_logps": -91.12489318847656, |
|
"eval_loss": 179.97398376464844, |
|
"eval_objective": 177.04129028320312, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5305383205413818, |
|
"eval_regularize": 0.40064504742622375, |
|
"eval_runtime": 371.8339, |
|
"eval_samples_per_second": 15.571, |
|
"eval_steps_per_second": 1.299, |
|
"eval_wo_beta": 16.26874351501465, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.6258116960525513, |
|
"epoch": 0.5810108644307983, |
|
"grad_norm": 16576.414881735825, |
|
"learning_rate": 4.867214574087337e-07, |
|
"logits": -1.3335182666778564, |
|
"logps": -85.45887756347656, |
|
"loss": 131.7019, |
|
"objective": 124.74952697753906, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.30992764234542847, |
|
"step": 205, |
|
"wo_beta": 16.508281707763672 |
|
}, |
|
{ |
|
"dpo_loss": 0.6121302247047424, |
|
"epoch": 0.5951818611242324, |
|
"grad_norm": 19803.851938298896, |
|
"learning_rate": 4.853598938650486e-07, |
|
"logits": -1.3813374042510986, |
|
"logps": -85.58969116210938, |
|
"loss": 142.8564, |
|
"objective": 147.2575225830078, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.34093615412712097, |
|
"step": 210, |
|
"wo_beta": 15.581511497497559 |
|
}, |
|
{ |
|
"dpo_loss": 0.6104889512062073, |
|
"epoch": 0.6093528578176665, |
|
"grad_norm": 16635.20929245541, |
|
"learning_rate": 4.839339852764349e-07, |
|
"logits": -1.4747200012207031, |
|
"logps": -84.081298828125, |
|
"loss": 132.2213, |
|
"objective": 143.8260040283203, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.3345707654953003, |
|
"step": 215, |
|
"wo_beta": 16.507150650024414 |
|
}, |
|
{ |
|
"dpo_loss": 0.6056095957756042, |
|
"epoch": 0.6235238545111006, |
|
"grad_norm": 17892.305863583017, |
|
"learning_rate": 4.824441214720628e-07, |
|
"logits": -1.4685624837875366, |
|
"logps": -84.3477783203125, |
|
"loss": 136.6061, |
|
"objective": 134.5189971923828, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.3130161464214325, |
|
"step": 220, |
|
"wo_beta": 16.86256217956543 |
|
}, |
|
{ |
|
"dpo_loss": 0.6179187297821045, |
|
"epoch": 0.6376948512045347, |
|
"grad_norm": 15080.021415971216, |
|
"learning_rate": 4.808907097658205e-07, |
|
"logits": -1.5259219408035278, |
|
"logps": -85.83920288085938, |
|
"loss": 132.0525, |
|
"objective": 135.1264190673828, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.310780793428421, |
|
"step": 225, |
|
"wo_beta": 15.415994644165039 |
|
}, |
|
{ |
|
"dpo_loss": 0.613567590713501, |
|
"epoch": 0.6518658478979689, |
|
"grad_norm": 15927.895183583649, |
|
"learning_rate": 4.792741748449574e-07, |
|
"logits": -1.4311482906341553, |
|
"logps": -85.6706314086914, |
|
"loss": 121.739, |
|
"objective": 121.26396179199219, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.29853278398513794, |
|
"step": 230, |
|
"wo_beta": 16.95390510559082 |
|
}, |
|
{ |
|
"dpo_loss": 0.6123810410499573, |
|
"epoch": 0.6660368445914029, |
|
"grad_norm": 18377.873397463725, |
|
"learning_rate": 4.775949586539803e-07, |
|
"logits": -1.3708454370498657, |
|
"logps": -86.44990539550781, |
|
"loss": 121.6467, |
|
"objective": 109.16976165771484, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.2606847584247589, |
|
"step": 235, |
|
"wo_beta": 16.56985092163086 |
|
}, |
|
{ |
|
"dpo_loss": 0.6006901264190674, |
|
"epoch": 0.680207841284837, |
|
"grad_norm": 20176.56902743112, |
|
"learning_rate": 4.758535202738287e-07, |
|
"logits": -1.5398815870285034, |
|
"logps": -86.04439544677734, |
|
"loss": 135.6713, |
|
"objective": 137.5254364013672, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.29497501254081726, |
|
"step": 240, |
|
"wo_beta": 16.538175582885742 |
|
}, |
|
{ |
|
"dpo_loss": 0.6231993436813354, |
|
"epoch": 0.6943788379782712, |
|
"grad_norm": 16269.619431330953, |
|
"learning_rate": 4.7405033579636755e-07, |
|
"logits": -1.5562618970870972, |
|
"logps": -86.00993347167969, |
|
"loss": 122.2342, |
|
"objective": 110.85368347167969, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.2635067105293274, |
|
"step": 245, |
|
"wo_beta": 16.003578186035156 |
|
}, |
|
{ |
|
"dpo_loss": 0.608769953250885, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 16883.673846830992, |
|
"learning_rate": 4.721858981942284e-07, |
|
"logits": -1.3197777271270752, |
|
"logps": -84.4737319946289, |
|
"loss": 130.7065, |
|
"objective": 131.6678924560547, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.29679057002067566, |
|
"step": 250, |
|
"wo_beta": 15.271538734436035 |
|
}, |
|
{ |
|
"epoch": 0.7085498346717053, |
|
"eval_dpo_loss": 0.680046558380127, |
|
"eval_logits": -1.5061429738998413, |
|
"eval_logps": -91.61782836914062, |
|
"eval_loss": 181.50924682617188, |
|
"eval_objective": 178.27838134765625, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5305383205413818, |
|
"eval_regularize": 0.404918372631073, |
|
"eval_runtime": 368.2121, |
|
"eval_samples_per_second": 15.725, |
|
"eval_steps_per_second": 1.312, |
|
"eval_wo_beta": 16.640703201293945, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.6077960729598999, |
|
"epoch": 0.7227208313651393, |
|
"grad_norm": 16400.742832285967, |
|
"learning_rate": 4.702607171860353e-07, |
|
"logits": -1.582943320274353, |
|
"logps": -83.8155746459961, |
|
"loss": 128.7728, |
|
"objective": 116.49504852294922, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.42916667461395264, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.28917694091796875, |
|
"step": 255, |
|
"wo_beta": 15.234207153320312 |
|
}, |
|
{ |
|
"dpo_loss": 0.6078327298164368, |
|
"epoch": 0.7368918280585735, |
|
"grad_norm": 19074.785460432806, |
|
"learning_rate": 4.6827531909705327e-07, |
|
"logits": -1.5513815879821777, |
|
"logps": -85.73532104492188, |
|
"loss": 120.8491, |
|
"objective": 135.26841735839844, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.3036416172981262, |
|
"step": 260, |
|
"wo_beta": 15.789148330688477 |
|
}, |
|
{ |
|
"dpo_loss": 0.6069940328598022, |
|
"epoch": 0.7510628247520076, |
|
"grad_norm": 14839.46399025929, |
|
"learning_rate": 4.662302467152955e-07, |
|
"logits": -1.4743403196334839, |
|
"logps": -85.0496597290039, |
|
"loss": 115.1093, |
|
"objective": 120.47551727294922, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.29132476449012756, |
|
"step": 265, |
|
"wo_beta": 16.472087860107422 |
|
}, |
|
{ |
|
"dpo_loss": 0.6180242300033569, |
|
"epoch": 0.7652338214454416, |
|
"grad_norm": 17448.69545980003, |
|
"learning_rate": 4.6412605914313143e-07, |
|
"logits": -1.5716725587844849, |
|
"logps": -85.8649673461914, |
|
"loss": 116.6119, |
|
"objective": 116.45634460449219, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.4791666567325592, |
|
"regularize": 0.25854194164276123, |
|
"step": 270, |
|
"wo_beta": 14.43771743774414 |
|
}, |
|
{ |
|
"dpo_loss": 0.6129618287086487, |
|
"epoch": 0.7794048181388757, |
|
"grad_norm": 16560.36826913483, |
|
"learning_rate": 4.619633316444329e-07, |
|
"logits": -1.4034606218338013, |
|
"logps": -84.92838287353516, |
|
"loss": 111.1039, |
|
"objective": 110.11347961425781, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.2595362663269043, |
|
"step": 275, |
|
"wo_beta": 15.505229949951172 |
|
}, |
|
{ |
|
"dpo_loss": 0.6037231087684631, |
|
"epoch": 0.7935758148323099, |
|
"grad_norm": 18592.99967397487, |
|
"learning_rate": 4.597426554873036e-07, |
|
"logits": -1.5048367977142334, |
|
"logps": -85.48095703125, |
|
"loss": 124.5191, |
|
"objective": 130.4561767578125, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.27942749857902527, |
|
"step": 280, |
|
"wo_beta": 15.601365089416504 |
|
}, |
|
{ |
|
"dpo_loss": 0.6074939370155334, |
|
"epoch": 0.807746811525744, |
|
"grad_norm": 16296.64397951243, |
|
"learning_rate": 4.574646377824315e-07, |
|
"logits": -1.4947975873947144, |
|
"logps": -87.23796844482422, |
|
"loss": 116.6867, |
|
"objective": 114.05181121826172, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.2666790187358856, |
|
"step": 285, |
|
"wo_beta": 17.003517150878906 |
|
}, |
|
{ |
|
"dpo_loss": 0.5908948183059692, |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 16844.100126093494, |
|
"learning_rate": 4.551299013171111e-07, |
|
"logits": -1.500679850578308, |
|
"logps": -86.47892761230469, |
|
"loss": 113.1195, |
|
"objective": 110.57820892333984, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.25748953223228455, |
|
"step": 290, |
|
"wo_beta": 16.26107406616211 |
|
}, |
|
{ |
|
"dpo_loss": 0.602837860584259, |
|
"epoch": 0.8360888049126122, |
|
"grad_norm": 15918.83667114978, |
|
"learning_rate": 4.5273908438498e-07, |
|
"logits": -1.488081693649292, |
|
"logps": -86.3355484008789, |
|
"loss": 112.7178, |
|
"objective": 110.10317993164062, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.2576940059661865, |
|
"step": 295, |
|
"wo_beta": 16.481365203857422 |
|
}, |
|
{ |
|
"dpo_loss": 0.6130139231681824, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 16024.42965961574, |
|
"learning_rate": 4.502928406115152e-07, |
|
"logits": -1.4704513549804688, |
|
"logps": -84.77509307861328, |
|
"loss": 109.74, |
|
"objective": 110.3909683227539, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.25806304812431335, |
|
"step": 300, |
|
"wo_beta": 14.735386848449707 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 0.6815229654312134, |
|
"eval_logits": -1.476037859916687, |
|
"eval_logps": -92.42357635498047, |
|
"eval_loss": 180.4923553466797, |
|
"eval_objective": 178.13650512695312, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5305383205413818, |
|
"eval_regularize": 0.40466198325157166, |
|
"eval_runtime": 369.1363, |
|
"eval_samples_per_second": 15.685, |
|
"eval_steps_per_second": 1.308, |
|
"eval_wo_beta": 16.49808120727539, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.6006699204444885, |
|
"epoch": 0.8644307982994804, |
|
"grad_norm": 16503.480891595664, |
|
"learning_rate": 4.4779183877533877e-07, |
|
"logits": -1.4632763862609863, |
|
"logps": -85.11184692382812, |
|
"loss": 110.0493, |
|
"objective": 114.68462371826172, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.25510621070861816, |
|
"step": 305, |
|
"wo_beta": 15.895527839660645 |
|
}, |
|
{ |
|
"dpo_loss": 0.6042004227638245, |
|
"epoch": 0.8786017949929145, |
|
"grad_norm": 15793.814439937723, |
|
"learning_rate": 4.4523676262538045e-07, |
|
"logits": -1.47891104221344, |
|
"logps": -84.36697387695312, |
|
"loss": 112.7694, |
|
"objective": 122.6783676147461, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.2812657058238983, |
|
"step": 310, |
|
"wo_beta": 15.347722053527832 |
|
}, |
|
{ |
|
"dpo_loss": 0.591303825378418, |
|
"epoch": 0.8927727916863486, |
|
"grad_norm": 17495.093435377945, |
|
"learning_rate": 4.426283106939473e-07, |
|
"logits": -1.4973819255828857, |
|
"logps": -84.14984893798828, |
|
"loss": 105.8584, |
|
"objective": 102.11656188964844, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.23703667521476746, |
|
"step": 315, |
|
"wo_beta": 15.412622451782227 |
|
}, |
|
{ |
|
"dpo_loss": 0.5901548862457275, |
|
"epoch": 0.9069437883797827, |
|
"grad_norm": 16776.49496984533, |
|
"learning_rate": 4.3996719610575215e-07, |
|
"logits": -1.4549332857131958, |
|
"logps": -86.48863983154297, |
|
"loss": 101.9659, |
|
"objective": 103.75129699707031, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.2400914579629898, |
|
"step": 320, |
|
"wo_beta": 15.67490005493164 |
|
}, |
|
{ |
|
"dpo_loss": 0.5979679226875305, |
|
"epoch": 0.9211147850732169, |
|
"grad_norm": 16212.553406478031, |
|
"learning_rate": 4.372541463829523e-07, |
|
"logits": -1.543658971786499, |
|
"logps": -87.07477569580078, |
|
"loss": 110.2578, |
|
"objective": 94.58499908447266, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.23497775197029114, |
|
"step": 325, |
|
"wo_beta": 15.544549942016602 |
|
}, |
|
{ |
|
"dpo_loss": 0.5859458446502686, |
|
"epoch": 0.9352857817666509, |
|
"grad_norm": 17177.10951815794, |
|
"learning_rate": 4.344899032462524e-07, |
|
"logits": -1.3802608251571655, |
|
"logps": -86.10081481933594, |
|
"loss": 113.3699, |
|
"objective": 119.8874740600586, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.2620205283164978, |
|
"step": 330, |
|
"wo_beta": 16.999446868896484 |
|
}, |
|
{ |
|
"dpo_loss": 0.5934227108955383, |
|
"epoch": 0.949456778460085, |
|
"grad_norm": 17661.288172623317, |
|
"learning_rate": 4.316752224121252e-07, |
|
"logits": -1.4096896648406982, |
|
"logps": -85.97354125976562, |
|
"loss": 112.801, |
|
"objective": 102.87708282470703, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.226911723613739, |
|
"step": 335, |
|
"wo_beta": 15.912822723388672 |
|
}, |
|
{ |
|
"dpo_loss": 0.5984110236167908, |
|
"epoch": 0.9636277751535192, |
|
"grad_norm": 16330.575056246405, |
|
"learning_rate": 4.2881087338620634e-07, |
|
"logits": -1.4624823331832886, |
|
"logps": -85.42594909667969, |
|
"loss": 112.5159, |
|
"objective": 105.5082015991211, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.24777108430862427, |
|
"step": 340, |
|
"wo_beta": 16.43703269958496 |
|
}, |
|
{ |
|
"dpo_loss": 0.5881261825561523, |
|
"epoch": 0.9777987718469532, |
|
"grad_norm": 16460.598983396474, |
|
"learning_rate": 4.258976392529192e-07, |
|
"logits": -1.5221667289733887, |
|
"logps": -84.57250213623047, |
|
"loss": 105.3708, |
|
"objective": 96.2402114868164, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5833333134651184, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.22673317790031433, |
|
"step": 345, |
|
"wo_beta": 15.497628211975098 |
|
}, |
|
{ |
|
"dpo_loss": 0.582562267780304, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 15979.907994389112, |
|
"learning_rate": 4.2293631646138735e-07, |
|
"logits": -1.4198105335235596, |
|
"logps": -87.27174377441406, |
|
"loss": 104.2663, |
|
"objective": 106.56012725830078, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.2592408061027527, |
|
"step": 350, |
|
"wo_beta": 15.726160049438477 |
|
}, |
|
{ |
|
"epoch": 0.9919697685403873, |
|
"eval_dpo_loss": 0.6808353066444397, |
|
"eval_logits": -1.5066107511520386, |
|
"eval_logps": -92.80049133300781, |
|
"eval_loss": 182.25906372070312, |
|
"eval_objective": 178.8644256591797, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5289855003356934, |
|
"eval_regularize": 0.4057510197162628, |
|
"eval_runtime": 368.5888, |
|
"eval_samples_per_second": 15.709, |
|
"eval_steps_per_second": 1.31, |
|
"eval_wo_beta": 16.569448471069336, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.5861319899559021, |
|
"epoch": 1.0061407652338215, |
|
"grad_norm": 16794.52051588047, |
|
"learning_rate": 4.1992771460769325e-07, |
|
"logits": -1.6238858699798584, |
|
"logps": -84.81636810302734, |
|
"loss": 101.4044, |
|
"objective": 99.22207641601562, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.2406620979309082, |
|
"step": 355, |
|
"wo_beta": 16.778457641601562 |
|
}, |
|
{ |
|
"dpo_loss": 0.5623802542686462, |
|
"epoch": 1.0203117619272555, |
|
"grad_norm": 17512.309592114732, |
|
"learning_rate": 4.168726562135431e-07, |
|
"logits": -1.4817250967025757, |
|
"logps": -85.78034973144531, |
|
"loss": 89.2678, |
|
"objective": 87.2276840209961, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.20162057876586914, |
|
"step": 360, |
|
"wo_beta": 16.540082931518555 |
|
}, |
|
{ |
|
"dpo_loss": 0.5711230635643005, |
|
"epoch": 1.0344827586206897, |
|
"grad_norm": 17722.39127835427, |
|
"learning_rate": 4.1377197650139734e-07, |
|
"logits": -1.5016947984695435, |
|
"logps": -86.203369140625, |
|
"loss": 94.5137, |
|
"objective": 95.95963287353516, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.21429090201854706, |
|
"step": 365, |
|
"wo_beta": 15.065950393676758 |
|
}, |
|
{ |
|
"dpo_loss": 0.5723836421966553, |
|
"epoch": 1.0486537553141237, |
|
"grad_norm": 16746.74468565218, |
|
"learning_rate": 4.106265231661291e-07, |
|
"logits": -1.4276858568191528, |
|
"logps": -84.18301391601562, |
|
"loss": 87.9015, |
|
"objective": 87.47245788574219, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.20792974531650543, |
|
"step": 370, |
|
"wo_beta": 15.806612968444824 |
|
}, |
|
{ |
|
"dpo_loss": 0.575459897518158, |
|
"epoch": 1.0628247520075578, |
|
"grad_norm": 16711.49471758267, |
|
"learning_rate": 4.0743715614327314e-07, |
|
"logits": -1.4709128141403198, |
|
"logps": -84.51998901367188, |
|
"loss": 81.3317, |
|
"objective": 80.16383361816406, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.19415231049060822, |
|
"step": 375, |
|
"wo_beta": 16.286664962768555 |
|
}, |
|
{ |
|
"dpo_loss": 0.5680096745491028, |
|
"epoch": 1.076995748700992, |
|
"grad_norm": 15938.178621987885, |
|
"learning_rate": 4.042047473739277e-07, |
|
"logits": -1.4488080739974976, |
|
"logps": -86.38304138183594, |
|
"loss": 88.4379, |
|
"objective": 90.77100372314453, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.21614128351211548, |
|
"step": 380, |
|
"wo_beta": 15.816045761108398 |
|
}, |
|
{ |
|
"dpo_loss": 0.5828992128372192, |
|
"epoch": 1.091166745394426, |
|
"grad_norm": 15573.908569824505, |
|
"learning_rate": 4.009301805663752e-07, |
|
"logits": -1.4298585653305054, |
|
"logps": -85.34860229492188, |
|
"loss": 90.2727, |
|
"objective": 100.38928985595703, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.21903865039348602, |
|
"step": 385, |
|
"wo_beta": 15.969101905822754 |
|
}, |
|
{ |
|
"dpo_loss": 0.5651105046272278, |
|
"epoch": 1.10533774208786, |
|
"grad_norm": 15903.36624651428, |
|
"learning_rate": 3.9761435095448424e-07, |
|
"logits": -1.386973261833191, |
|
"logps": -86.8327865600586, |
|
"loss": 85.9698, |
|
"objective": 81.47636413574219, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.1953142136335373, |
|
"step": 390, |
|
"wo_beta": 16.701154708862305 |
|
}, |
|
{ |
|
"dpo_loss": 0.5567125082015991, |
|
"epoch": 1.1195087387812943, |
|
"grad_norm": 17073.21610205935, |
|
"learning_rate": 3.942581650529625e-07, |
|
"logits": -1.4661533832550049, |
|
"logps": -86.22716522216797, |
|
"loss": 78.9786, |
|
"objective": 86.99171447753906, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.19182702898979187, |
|
"step": 395, |
|
"wo_beta": 15.073732376098633 |
|
}, |
|
{ |
|
"dpo_loss": 0.5746569037437439, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 17100.977239850836, |
|
"learning_rate": 3.908625404095242e-07, |
|
"logits": -1.542074203491211, |
|
"logps": -84.60574340820312, |
|
"loss": 91.3585, |
|
"objective": 91.53578186035156, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.2116149365901947, |
|
"step": 400, |
|
"wo_beta": 15.816818237304688 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 0.6799505352973938, |
|
"eval_logits": -1.478875756263733, |
|
"eval_logps": -92.38536071777344, |
|
"eval_loss": 180.029541015625, |
|
"eval_objective": 177.71481323242188, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5279502868652344, |
|
"eval_regularize": 0.402423620223999, |
|
"eval_runtime": 374.7281, |
|
"eval_samples_per_second": 15.451, |
|
"eval_steps_per_second": 1.289, |
|
"eval_wo_beta": 16.585174560546875, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.5667446255683899, |
|
"epoch": 1.1478507321681626, |
|
"grad_norm": 17235.250314074532, |
|
"learning_rate": 3.874284053540415e-07, |
|
"logits": -1.506400227546692, |
|
"logps": -86.09246063232422, |
|
"loss": 87.7881, |
|
"objective": 99.79354095458984, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.21635954082012177, |
|
"step": 405, |
|
"wo_beta": 17.256864547729492 |
|
}, |
|
{ |
|
"dpo_loss": 0.5707606673240662, |
|
"epoch": 1.1620217288615966, |
|
"grad_norm": 16725.889726383095, |
|
"learning_rate": 3.839566987447491e-07, |
|
"logits": -1.5043673515319824, |
|
"logps": -85.6620864868164, |
|
"loss": 88.1003, |
|
"objective": 87.76438903808594, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.19931498169898987, |
|
"step": 410, |
|
"wo_beta": 16.657556533813477 |
|
}, |
|
{ |
|
"dpo_loss": 0.5724092721939087, |
|
"epoch": 1.1761927255550306, |
|
"grad_norm": 14812.855448630145, |
|
"learning_rate": 3.804483697115693e-07, |
|
"logits": -1.420817255973816, |
|
"logps": -85.20552062988281, |
|
"loss": 80.2264, |
|
"objective": 89.5212173461914, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.21374522149562836, |
|
"step": 415, |
|
"wo_beta": 16.42493438720703 |
|
}, |
|
{ |
|
"dpo_loss": 0.5684979557991028, |
|
"epoch": 1.1903637222484649, |
|
"grad_norm": 16758.534351302478, |
|
"learning_rate": 3.769043773966292e-07, |
|
"logits": -1.3999756574630737, |
|
"logps": -86.63607788085938, |
|
"loss": 86.6386, |
|
"objective": 74.90924835205078, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.17667636275291443, |
|
"step": 420, |
|
"wo_beta": 16.390174865722656 |
|
}, |
|
{ |
|
"dpo_loss": 0.5639453530311584, |
|
"epoch": 1.204534718941899, |
|
"grad_norm": 16337.276317485204, |
|
"learning_rate": 3.733256906920412e-07, |
|
"logits": -1.4687834978103638, |
|
"logps": -85.14289093017578, |
|
"loss": 88.4628, |
|
"objective": 91.70726013183594, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.21049639582633972, |
|
"step": 425, |
|
"wo_beta": 15.31130599975586 |
|
}, |
|
{ |
|
"dpo_loss": 0.5734551548957825, |
|
"epoch": 1.2187057156353331, |
|
"grad_norm": 15248.100441462713, |
|
"learning_rate": 3.6971328797501735e-07, |
|
"logits": -1.448046088218689, |
|
"logps": -86.40514373779297, |
|
"loss": 77.2816, |
|
"objective": 79.60990142822266, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.17995795607566833, |
|
"step": 430, |
|
"wo_beta": 16.629831314086914 |
|
}, |
|
{ |
|
"dpo_loss": 0.5668199062347412, |
|
"epoch": 1.2328767123287672, |
|
"grad_norm": 16306.828228688702, |
|
"learning_rate": 3.660681568403909e-07, |
|
"logits": -1.4072421789169312, |
|
"logps": -85.66299438476562, |
|
"loss": 87.0271, |
|
"objective": 85.58671569824219, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5958333611488342, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.19756914675235748, |
|
"step": 435, |
|
"wo_beta": 17.663021087646484 |
|
}, |
|
{ |
|
"dpo_loss": 0.5658931732177734, |
|
"epoch": 1.2470477090222012, |
|
"grad_norm": 20345.22767033362, |
|
"learning_rate": 3.623912938306176e-07, |
|
"logits": -1.4035090208053589, |
|
"logps": -85.33922576904297, |
|
"loss": 79.1586, |
|
"objective": 74.25531768798828, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.17811766266822815, |
|
"step": 440, |
|
"wo_beta": 15.194890975952148 |
|
}, |
|
{ |
|
"dpo_loss": 0.5541914105415344, |
|
"epoch": 1.2612187057156352, |
|
"grad_norm": 17279.48326071237, |
|
"learning_rate": 3.5868370416333116e-07, |
|
"logits": -1.4238730669021606, |
|
"logps": -87.22127532958984, |
|
"loss": 85.5554, |
|
"objective": 88.88020324707031, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.21330490708351135, |
|
"step": 445, |
|
"wo_beta": 17.520837783813477 |
|
}, |
|
{ |
|
"dpo_loss": 0.5635745525360107, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 15953.8185394536, |
|
"learning_rate": 3.549464014565264e-07, |
|
"logits": -1.5866882801055908, |
|
"logps": -86.32205963134766, |
|
"loss": 77.8925, |
|
"objective": 74.42088317871094, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.1866002231836319, |
|
"step": 450, |
|
"wo_beta": 14.815221786499023 |
|
}, |
|
{ |
|
"epoch": 1.2753897024090695, |
|
"eval_dpo_loss": 0.679172158241272, |
|
"eval_logits": -1.4745829105377197, |
|
"eval_logps": -92.70618438720703, |
|
"eval_loss": 179.24407958984375, |
|
"eval_objective": 175.84751892089844, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5274327397346497, |
|
"eval_regularize": 0.39890119433403015, |
|
"eval_runtime": 369.0794, |
|
"eval_samples_per_second": 15.688, |
|
"eval_steps_per_second": 1.309, |
|
"eval_wo_beta": 16.526927947998047, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.5642288327217102, |
|
"epoch": 1.2895606991025035, |
|
"grad_norm": 15540.821203538826, |
|
"learning_rate": 3.511804074514468e-07, |
|
"logits": -1.3849934339523315, |
|
"logps": -86.52748107910156, |
|
"loss": 78.8058, |
|
"objective": 79.82011413574219, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.18714289367198944, |
|
"step": 455, |
|
"wo_beta": 15.67411994934082 |
|
}, |
|
{ |
|
"dpo_loss": 0.5722388625144958, |
|
"epoch": 1.3037316957959377, |
|
"grad_norm": 17894.656202069662, |
|
"learning_rate": 3.4738675173325007e-07, |
|
"logits": -1.5175042152404785, |
|
"logps": -86.22012329101562, |
|
"loss": 80.887, |
|
"objective": 79.5283432006836, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.18461202085018158, |
|
"step": 460, |
|
"wo_beta": 16.33133316040039 |
|
}, |
|
{ |
|
"dpo_loss": 0.5584803819656372, |
|
"epoch": 1.3179026924893718, |
|
"grad_norm": 15804.388771620323, |
|
"learning_rate": 3.4356647144953003e-07, |
|
"logits": -1.4979623556137085, |
|
"logps": -84.8365478515625, |
|
"loss": 78.4995, |
|
"objective": 80.0457763671875, |
|
"ranking_idealized": 0.5541666746139526, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.19293205440044403, |
|
"step": 465, |
|
"wo_beta": 14.855480194091797 |
|
}, |
|
{ |
|
"dpo_loss": 0.5635024309158325, |
|
"epoch": 1.3320736891828058, |
|
"grad_norm": 15763.736551798547, |
|
"learning_rate": 3.3972061102677124e-07, |
|
"logits": -1.5794017314910889, |
|
"logps": -83.71866607666016, |
|
"loss": 80.6678, |
|
"objective": 83.6802749633789, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.18187165260314941, |
|
"step": 470, |
|
"wo_beta": 14.902677536010742 |
|
}, |
|
{ |
|
"dpo_loss": 0.5684855580329895, |
|
"epoch": 1.34624468587624, |
|
"grad_norm": 16747.760130279752, |
|
"learning_rate": 3.3585022188481246e-07, |
|
"logits": -1.45767343044281, |
|
"logps": -85.98019409179688, |
|
"loss": 70.8929, |
|
"objective": 75.84042358398438, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.19827060401439667, |
|
"step": 475, |
|
"wo_beta": 16.622520446777344 |
|
}, |
|
{ |
|
"dpo_loss": 0.5594444274902344, |
|
"epoch": 1.360415682569674, |
|
"grad_norm": 15233.336787247383, |
|
"learning_rate": 3.3195636214939935e-07, |
|
"logits": -1.5256932973861694, |
|
"logps": -86.3495101928711, |
|
"loss": 72.525, |
|
"objective": 74.22408294677734, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.1702665388584137, |
|
"step": 480, |
|
"wo_beta": 17.96377182006836 |
|
}, |
|
{ |
|
"dpo_loss": 0.5718420743942261, |
|
"epoch": 1.3745866792631083, |
|
"grad_norm": 16631.30941131418, |
|
"learning_rate": 3.2804009636290396e-07, |
|
"logits": -1.5204293727874756, |
|
"logps": -84.49634552001953, |
|
"loss": 76.2635, |
|
"objective": 74.72454071044922, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.18596205115318298, |
|
"step": 485, |
|
"wo_beta": 16.379438400268555 |
|
}, |
|
{ |
|
"dpo_loss": 0.5699793696403503, |
|
"epoch": 1.3887576759565423, |
|
"grad_norm": 18558.655175644297, |
|
"learning_rate": 3.241024951932884e-07, |
|
"logits": -1.4783555269241333, |
|
"logps": -86.9972915649414, |
|
"loss": 75.5371, |
|
"objective": 76.1517105102539, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.18451951444149017, |
|
"step": 490, |
|
"wo_beta": 15.841702461242676 |
|
}, |
|
{ |
|
"dpo_loss": 0.5529130101203918, |
|
"epoch": 1.4029286726499763, |
|
"grad_norm": 20634.943800946956, |
|
"learning_rate": 3.201446351413958e-07, |
|
"logits": -1.4992899894714355, |
|
"logps": -87.463623046875, |
|
"loss": 79.5849, |
|
"objective": 71.89769744873047, |
|
"ranking_idealized": 0.5583333373069763, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.17148242890834808, |
|
"step": 495, |
|
"wo_beta": 15.643444061279297 |
|
}, |
|
{ |
|
"dpo_loss": 0.5567610859870911, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 17182.106354079515, |
|
"learning_rate": 3.161675982466454e-07, |
|
"logits": -1.4364333152770996, |
|
"logps": -86.2386703491211, |
|
"loss": 73.5844, |
|
"objective": 70.71806335449219, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.16988083720207214, |
|
"step": 500, |
|
"wo_beta": 15.247078895568848 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 0.6786399483680725, |
|
"eval_logits": -1.4849432706832886, |
|
"eval_logps": -93.26954650878906, |
|
"eval_loss": 180.36428833007812, |
|
"eval_objective": 176.233154296875, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5305383205413818, |
|
"eval_regularize": 0.3994362950325012, |
|
"eval_runtime": 373.0338, |
|
"eval_samples_per_second": 15.521, |
|
"eval_steps_per_second": 1.295, |
|
"eval_wo_beta": 16.500259399414062, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.5469445586204529, |
|
"epoch": 1.4312706660368446, |
|
"grad_norm": 16377.98808062248, |
|
"learning_rate": 3.121724717912138e-07, |
|
"logits": -1.507896900177002, |
|
"logps": -86.27371978759766, |
|
"loss": 81.0237, |
|
"objective": 78.97586822509766, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.17829085886478424, |
|
"step": 505, |
|
"wo_beta": 16.57958984375 |
|
}, |
|
{ |
|
"dpo_loss": 0.5666388273239136, |
|
"epoch": 1.4454416627302786, |
|
"grad_norm": 16987.7779441374, |
|
"learning_rate": 3.081603480027826e-07, |
|
"logits": -1.5229469537734985, |
|
"logps": -86.77935028076172, |
|
"loss": 72.4223, |
|
"objective": 68.7424087524414, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.16074414551258087, |
|
"step": 510, |
|
"wo_beta": 15.368119239807129 |
|
}, |
|
{ |
|
"dpo_loss": 0.5596444606781006, |
|
"epoch": 1.4596126594237129, |
|
"grad_norm": 18192.200248438854, |
|
"learning_rate": 3.0413232375593494e-07, |
|
"logits": -1.581657886505127, |
|
"logps": -87.4643783569336, |
|
"loss": 75.4207, |
|
"objective": 79.91267395019531, |
|
"ranking_idealized": 0.550000011920929, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.19809895753860474, |
|
"step": 515, |
|
"wo_beta": 16.213024139404297 |
|
}, |
|
{ |
|
"dpo_loss": 0.5609327554702759, |
|
"epoch": 1.473783656117147, |
|
"grad_norm": 17307.001345557863, |
|
"learning_rate": 3.000895002722803e-07, |
|
"logits": -1.472069501876831, |
|
"logps": -84.53739929199219, |
|
"loss": 73.445, |
|
"objective": 75.21688079833984, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.17492397129535675, |
|
"step": 520, |
|
"wo_beta": 15.897907257080078 |
|
}, |
|
{ |
|
"dpo_loss": 0.5437408685684204, |
|
"epoch": 1.487954652810581, |
|
"grad_norm": 16071.775164367858, |
|
"learning_rate": 2.960329828193918e-07, |
|
"logits": -1.3647209405899048, |
|
"logps": -83.83393859863281, |
|
"loss": 80.3729, |
|
"objective": 83.74950408935547, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.1921227127313614, |
|
"step": 525, |
|
"wo_beta": 15.854673385620117 |
|
}, |
|
{ |
|
"dpo_loss": 0.5658655762672424, |
|
"epoch": 1.5021256495040152, |
|
"grad_norm": 16145.860622049926, |
|
"learning_rate": 2.919638804086369e-07, |
|
"logits": -1.5306588411331177, |
|
"logps": -86.52985382080078, |
|
"loss": 72.6051, |
|
"objective": 74.99629211425781, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.16594384610652924, |
|
"step": 530, |
|
"wo_beta": 15.360599517822266 |
|
}, |
|
{ |
|
"dpo_loss": 0.5535832643508911, |
|
"epoch": 1.5162966461974492, |
|
"grad_norm": 16070.421212133448, |
|
"learning_rate": 2.878833054919851e-07, |
|
"logits": -1.5020090341567993, |
|
"logps": -85.10139465332031, |
|
"loss": 76.8492, |
|
"objective": 71.94368743896484, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.17868037521839142, |
|
"step": 535, |
|
"wo_beta": 15.670897483825684 |
|
}, |
|
{ |
|
"dpo_loss": 0.5716097354888916, |
|
"epoch": 1.5304676428908834, |
|
"grad_norm": 17086.3623131816, |
|
"learning_rate": 2.8379237365787425e-07, |
|
"logits": -1.4154467582702637, |
|
"logps": -85.44342803955078, |
|
"loss": 72.6152, |
|
"objective": 71.88426208496094, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.16814741492271423, |
|
"step": 540, |
|
"wo_beta": 15.268050193786621 |
|
}, |
|
{ |
|
"dpo_loss": 0.5665360689163208, |
|
"epoch": 1.5446386395843175, |
|
"grad_norm": 15014.903555178817, |
|
"learning_rate": 2.7969220332622e-07, |
|
"logits": -1.5325461626052856, |
|
"logps": -86.13166046142578, |
|
"loss": 71.8279, |
|
"objective": 66.47465515136719, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5874999761581421, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.15991279482841492, |
|
"step": 545, |
|
"wo_beta": 15.164175033569336 |
|
}, |
|
{ |
|
"dpo_loss": 0.5597227811813354, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 15652.670268373007, |
|
"learning_rate": 2.7558391544265126e-07, |
|
"logits": -1.5225752592086792, |
|
"logps": -86.28731536865234, |
|
"loss": 74.752, |
|
"objective": 75.39783477783203, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.1811082661151886, |
|
"step": 550, |
|
"wo_beta": 15.459908485412598 |
|
}, |
|
{ |
|
"epoch": 1.5588096362777515, |
|
"eval_dpo_loss": 0.6795263886451721, |
|
"eval_logits": -1.4832454919815063, |
|
"eval_logps": -92.88924407958984, |
|
"eval_loss": 181.36459350585938, |
|
"eval_objective": 177.22666931152344, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5274327397346497, |
|
"eval_regularize": 0.40197721123695374, |
|
"eval_runtime": 369.092, |
|
"eval_samples_per_second": 15.687, |
|
"eval_steps_per_second": 1.309, |
|
"eval_wo_beta": 16.55462646484375, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 0.5626943707466125, |
|
"epoch": 1.5729806329711855, |
|
"grad_norm": 18195.881077955793, |
|
"learning_rate": 2.7146863317205425e-07, |
|
"logits": -1.3379462957382202, |
|
"logps": -86.09709930419922, |
|
"loss": 77.0693, |
|
"objective": 73.5696792602539, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.1626834124326706, |
|
"step": 555, |
|
"wo_beta": 14.696831703186035 |
|
}, |
|
{ |
|
"dpo_loss": 0.552921712398529, |
|
"epoch": 1.5871516296646198, |
|
"grad_norm": 17118.412791156054, |
|
"learning_rate": 2.67347481591511e-07, |
|
"logits": -1.4245628118515015, |
|
"logps": -84.61250305175781, |
|
"loss": 71.0964, |
|
"objective": 72.3261489868164, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.1769203245639801, |
|
"step": 560, |
|
"wo_beta": 16.116395950317383 |
|
}, |
|
{ |
|
"dpo_loss": 0.5479990243911743, |
|
"epoch": 1.601322626358054, |
|
"grad_norm": 17861.734640053382, |
|
"learning_rate": 2.6322158738271414e-07, |
|
"logits": -1.3948200941085815, |
|
"logps": -85.63233184814453, |
|
"loss": 69.8501, |
|
"objective": 66.04268646240234, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.44999998807907104, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.1456415057182312, |
|
"step": 565, |
|
"wo_beta": 15.080788612365723 |
|
}, |
|
{ |
|
"dpo_loss": 0.5674367547035217, |
|
"epoch": 1.615493623051488, |
|
"grad_norm": 17710.317360917932, |
|
"learning_rate": 2.590920785239436e-07, |
|
"logits": -1.5569151639938354, |
|
"logps": -86.57015228271484, |
|
"loss": 67.9067, |
|
"objective": 68.46839904785156, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.16210322082042694, |
|
"step": 570, |
|
"wo_beta": 15.696702003479004 |
|
}, |
|
{ |
|
"dpo_loss": 0.5638484358787537, |
|
"epoch": 1.629664619744922, |
|
"grad_norm": 18073.751646088458, |
|
"learning_rate": 2.549600839816884e-07, |
|
"logits": -1.5135074853897095, |
|
"logps": -84.736328125, |
|
"loss": 72.0534, |
|
"objective": 84.00183868408203, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.1912422776222229, |
|
"step": 575, |
|
"wo_beta": 16.294113159179688 |
|
}, |
|
{ |
|
"dpo_loss": 0.5543667078018188, |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 16118.144699864974, |
|
"learning_rate": 2.508267334019988e-07, |
|
"logits": -1.5415210723876953, |
|
"logps": -84.3241195678711, |
|
"loss": 67.4175, |
|
"objective": 66.42247009277344, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.1573016196489334, |
|
"step": 580, |
|
"wo_beta": 14.898622512817383 |
|
}, |
|
{ |
|
"dpo_loss": 0.5663701295852661, |
|
"epoch": 1.6580066131317903, |
|
"grad_norm": 17666.121211791866, |
|
"learning_rate": 2.4669315680165195e-07, |
|
"logits": -1.3956176042556763, |
|
"logps": -84.12041473388672, |
|
"loss": 66.3727, |
|
"objective": 61.66792678833008, |
|
"ranking_idealized": 0.5333333611488342, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.5083333253860474, |
|
"regularize": 0.15171830356121063, |
|
"step": 585, |
|
"wo_beta": 15.958919525146484 |
|
}, |
|
{ |
|
"dpo_loss": 0.558569610118866, |
|
"epoch": 1.6721776098252243, |
|
"grad_norm": 16850.29970069498, |
|
"learning_rate": 2.425604842592169e-07, |
|
"logits": -1.4625413417816162, |
|
"logps": -84.04219055175781, |
|
"loss": 74.8738, |
|
"objective": 81.01467895507812, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.18167801201343536, |
|
"step": 590, |
|
"wo_beta": 13.437705039978027 |
|
}, |
|
{ |
|
"dpo_loss": 0.5554817914962769, |
|
"epoch": 1.6863486065186586, |
|
"grad_norm": 17084.24865418485, |
|
"learning_rate": 2.384298456061022e-07, |
|
"logits": -1.4085568189620972, |
|
"logps": -84.96867370605469, |
|
"loss": 69.8306, |
|
"objective": 80.21109008789062, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.18683354556560516, |
|
"step": 595, |
|
"wo_beta": 17.37420654296875 |
|
}, |
|
{ |
|
"dpo_loss": 0.5489537119865417, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 15218.152775914608, |
|
"learning_rate": 2.3430237011767164e-07, |
|
"logits": -1.4603925943374634, |
|
"logps": -85.71869659423828, |
|
"loss": 66.606, |
|
"objective": 76.13477325439453, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.17256483435630798, |
|
"step": 600, |
|
"wo_beta": 16.145771026611328 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 0.6788755655288696, |
|
"eval_logits": -1.4675469398498535, |
|
"eval_logps": -91.61579895019531, |
|
"eval_loss": 179.49525451660156, |
|
"eval_objective": 176.2792510986328, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5310559272766113, |
|
"eval_regularize": 0.3999297320842743, |
|
"eval_runtime": 368.9049, |
|
"eval_samples_per_second": 15.695, |
|
"eval_steps_per_second": 1.309, |
|
"eval_wo_beta": 16.6182804107666, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 0.564985454082489, |
|
"epoch": 1.7146905999055266, |
|
"grad_norm": 16714.450551200658, |
|
"learning_rate": 2.30179186204511e-07, |
|
"logits": -1.4053993225097656, |
|
"logps": -86.0546646118164, |
|
"loss": 69.0228, |
|
"objective": 73.44601440429688, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.17200763523578644, |
|
"step": 605, |
|
"wo_beta": 18.306556701660156 |
|
}, |
|
{ |
|
"dpo_loss": 0.5740829706192017, |
|
"epoch": 1.7288615965989607, |
|
"grad_norm": 18689.625067588473, |
|
"learning_rate": 2.2606142110393245e-07, |
|
"logits": -1.4901000261306763, |
|
"logps": -84.50035858154297, |
|
"loss": 69.9038, |
|
"objective": 70.67005920410156, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.1648392379283905, |
|
"step": 610, |
|
"wo_beta": 15.994599342346191 |
|
}, |
|
{ |
|
"dpo_loss": 0.5698901414871216, |
|
"epoch": 1.743032593292395, |
|
"grad_norm": 17284.824092388248, |
|
"learning_rate": 2.2195020057179894e-07, |
|
"logits": -1.4990768432617188, |
|
"logps": -84.69489288330078, |
|
"loss": 69.1858, |
|
"objective": 62.69770431518555, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.1527046412229538, |
|
"step": 615, |
|
"wo_beta": 15.638397216796875 |
|
}, |
|
{ |
|
"dpo_loss": 0.553841233253479, |
|
"epoch": 1.7572035899858292, |
|
"grad_norm": 16362.729415329737, |
|
"learning_rate": 2.1784664857475352e-07, |
|
"logits": -1.54779851436615, |
|
"logps": -84.9485092163086, |
|
"loss": 69.826, |
|
"objective": 70.27579498291016, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.1771712750196457, |
|
"step": 620, |
|
"wo_beta": 16.527475357055664 |
|
}, |
|
{ |
|
"dpo_loss": 0.5640650987625122, |
|
"epoch": 1.7713745866792632, |
|
"grad_norm": 16519.00259049995, |
|
"learning_rate": 2.1375188698293854e-07, |
|
"logits": -1.456007480621338, |
|
"logps": -83.91544342041016, |
|
"loss": 67.7564, |
|
"objective": 73.47472381591797, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.16373471915721893, |
|
"step": 625, |
|
"wo_beta": 15.541656494140625 |
|
}, |
|
{ |
|
"dpo_loss": 0.5638567805290222, |
|
"epoch": 1.7855455833726972, |
|
"grad_norm": 16003.013277681921, |
|
"learning_rate": 2.0966703526328726e-07, |
|
"logits": -1.4914802312850952, |
|
"logps": -85.642822265625, |
|
"loss": 68.6768, |
|
"objective": 65.9126205444336, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.15695922076702118, |
|
"step": 630, |
|
"wo_beta": 16.262632369995117 |
|
}, |
|
{ |
|
"dpo_loss": 0.5648781657218933, |
|
"epoch": 1.7997165800661312, |
|
"grad_norm": 17168.862791414114, |
|
"learning_rate": 2.0559321017347282e-07, |
|
"logits": -1.5868287086486816, |
|
"logps": -84.44722747802734, |
|
"loss": 63.1038, |
|
"objective": 59.76757049560547, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.15224316716194153, |
|
"step": 635, |
|
"wo_beta": 17.01032829284668 |
|
}, |
|
{ |
|
"dpo_loss": 0.5457783341407776, |
|
"epoch": 1.8138875767595655, |
|
"grad_norm": 17993.04524523219, |
|
"learning_rate": 2.0153152545659796e-07, |
|
"logits": -1.5063692331314087, |
|
"logps": -85.88375091552734, |
|
"loss": 66.3807, |
|
"objective": 64.99014282226562, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.15310163795948029, |
|
"step": 640, |
|
"wo_beta": 17.52164649963379 |
|
}, |
|
{ |
|
"dpo_loss": 0.5509151220321655, |
|
"epoch": 1.8280585734529995, |
|
"grad_norm": 19625.243857455473, |
|
"learning_rate": 1.9748309153670856e-07, |
|
"logits": -1.5516611337661743, |
|
"logps": -86.286376953125, |
|
"loss": 65.9614, |
|
"objective": 64.1622543334961, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.1483081430196762, |
|
"step": 645, |
|
"wo_beta": 16.739274978637695 |
|
}, |
|
{ |
|
"dpo_loss": 0.5534842014312744, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 17668.158898193855, |
|
"learning_rate": 1.9344901521521498e-07, |
|
"logits": -1.6079561710357666, |
|
"logps": -86.36868286132812, |
|
"loss": 65.4503, |
|
"objective": 62.571048736572266, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.14387734234333038, |
|
"step": 650, |
|
"wo_beta": 15.527718544006348 |
|
}, |
|
{ |
|
"epoch": 1.8422295701464337, |
|
"eval_dpo_loss": 0.6790141463279724, |
|
"eval_logits": -1.504582166671753, |
|
"eval_logps": -91.89739990234375, |
|
"eval_loss": 180.12484741210938, |
|
"eval_objective": 176.55528259277344, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5284678936004639, |
|
"eval_regularize": 0.4002520442008972, |
|
"eval_runtime": 372.2243, |
|
"eval_samples_per_second": 15.555, |
|
"eval_steps_per_second": 1.298, |
|
"eval_wo_beta": 16.537281036376953, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 0.5599731206893921, |
|
"epoch": 1.8564005668398678, |
|
"grad_norm": 18359.19761584605, |
|
"learning_rate": 1.8943039936830344e-07, |
|
"logits": -1.4786803722381592, |
|
"logps": -83.48601531982422, |
|
"loss": 69.3454, |
|
"objective": 74.32158660888672, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.1768590807914734, |
|
"step": 655, |
|
"wo_beta": 15.510722160339355 |
|
}, |
|
{ |
|
"dpo_loss": 0.5625216960906982, |
|
"epoch": 1.8705715635333018, |
|
"grad_norm": 16101.25212298875, |
|
"learning_rate": 1.854283426454209e-07, |
|
"logits": -1.545279622077942, |
|
"logps": -86.23809051513672, |
|
"loss": 65.0768, |
|
"objective": 58.69490432739258, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.14343543350696564, |
|
"step": 660, |
|
"wo_beta": 17.122419357299805 |
|
}, |
|
{ |
|
"dpo_loss": 0.5581023097038269, |
|
"epoch": 1.8847425602267358, |
|
"grad_norm": 16788.328924690355, |
|
"learning_rate": 1.8144393916891508e-07, |
|
"logits": -1.526328206062317, |
|
"logps": -85.24169921875, |
|
"loss": 63.7541, |
|
"objective": 67.25133514404297, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.14928290247917175, |
|
"step": 665, |
|
"wo_beta": 15.62762451171875 |
|
}, |
|
{ |
|
"dpo_loss": 0.553265392780304, |
|
"epoch": 1.89891355692017, |
|
"grad_norm": 18214.847289885216, |
|
"learning_rate": 1.7747827823491252e-07, |
|
"logits": -1.4548065662384033, |
|
"logps": -83.87257385253906, |
|
"loss": 57.5593, |
|
"objective": 56.684471130371094, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.14734616875648499, |
|
"step": 670, |
|
"wo_beta": 16.462730407714844 |
|
}, |
|
{ |
|
"dpo_loss": 0.5662250518798828, |
|
"epoch": 1.9130845536136043, |
|
"grad_norm": 16267.920642791076, |
|
"learning_rate": 1.7353244401551565e-07, |
|
"logits": -1.478503704071045, |
|
"logps": -84.67176818847656, |
|
"loss": 60.0148, |
|
"objective": 62.5386962890625, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.14488257467746735, |
|
"step": 675, |
|
"wo_beta": 15.750804901123047 |
|
}, |
|
{ |
|
"dpo_loss": 0.5515182018280029, |
|
"epoch": 1.9272555503070383, |
|
"grad_norm": 17294.380974269086, |
|
"learning_rate": 1.6960751526240118e-07, |
|
"logits": -1.5540010929107666, |
|
"logps": -86.8657455444336, |
|
"loss": 60.2369, |
|
"objective": 55.505897521972656, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.13418342173099518, |
|
"step": 680, |
|
"wo_beta": 15.37994384765625 |
|
}, |
|
{ |
|
"dpo_loss": 0.5534944534301758, |
|
"epoch": 1.9414265470004723, |
|
"grad_norm": 17684.725981514333, |
|
"learning_rate": 1.6570456501189994e-07, |
|
"logits": -1.4706988334655762, |
|
"logps": -84.12257385253906, |
|
"loss": 60.967, |
|
"objective": 55.13488006591797, |
|
"ranking_idealized": 0.5291666388511658, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5291666388511658, |
|
"regularize": 0.13964900374412537, |
|
"step": 685, |
|
"wo_beta": 15.740229606628418 |
|
}, |
|
{ |
|
"dpo_loss": 0.5616536736488342, |
|
"epoch": 1.9555975436939064, |
|
"grad_norm": 17181.452666650763, |
|
"learning_rate": 1.618246602916397e-07, |
|
"logits": -1.547702670097351, |
|
"logps": -84.80847930908203, |
|
"loss": 56.3999, |
|
"objective": 58.78403854370117, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.13416926562786102, |
|
"step": 690, |
|
"wo_beta": 16.004222869873047 |
|
}, |
|
{ |
|
"dpo_loss": 0.5623855590820312, |
|
"epoch": 1.9697685403873406, |
|
"grad_norm": 15698.603949750828, |
|
"learning_rate": 1.579688618288305e-07, |
|
"logits": -1.4090545177459717, |
|
"logps": -85.53604888916016, |
|
"loss": 57.3939, |
|
"objective": 54.246089935302734, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.12334556132555008, |
|
"step": 695, |
|
"wo_beta": 16.506813049316406 |
|
}, |
|
{ |
|
"dpo_loss": 0.5531891584396362, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 17232.305142451998, |
|
"learning_rate": 1.541382237602721e-07, |
|
"logits": -1.3992184400558472, |
|
"logps": -86.07640075683594, |
|
"loss": 62.3615, |
|
"objective": 63.35702896118164, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.14704561233520508, |
|
"step": 700, |
|
"wo_beta": 15.364715576171875 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 0.678434431552887, |
|
"eval_logits": -1.4984484910964966, |
|
"eval_logps": -91.58750915527344, |
|
"eval_loss": 179.3856658935547, |
|
"eval_objective": 176.00213623046875, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5300207138061523, |
|
"eval_regularize": 0.3992062211036682, |
|
"eval_runtime": 368.882, |
|
"eval_samples_per_second": 15.696, |
|
"eval_steps_per_second": 1.309, |
|
"eval_wo_beta": 16.5863037109375, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 0.5491320490837097, |
|
"epoch": 1.9981105337742089, |
|
"grad_norm": 18325.752866553015, |
|
"learning_rate": 1.5033379334416375e-07, |
|
"logits": -1.3390460014343262, |
|
"logps": -84.89611053466797, |
|
"loss": 62.0311, |
|
"objective": 66.05867767333984, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.15552859008312225, |
|
"step": 705, |
|
"wo_beta": 17.203296661376953 |
|
}, |
|
{ |
|
"dpo_loss": 0.5555277466773987, |
|
"epoch": 2.012281530467643, |
|
"grad_norm": 16395.524810634437, |
|
"learning_rate": 1.465566106737942e-07, |
|
"logits": -1.4830571413040161, |
|
"logps": -84.74015045166016, |
|
"loss": 56.8191, |
|
"objective": 61.21573257446289, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.1427960991859436, |
|
"step": 710, |
|
"wo_beta": 15.91169261932373 |
|
}, |
|
{ |
|
"dpo_loss": 0.5421245098114014, |
|
"epoch": 2.026452527161077, |
|
"grad_norm": 17457.779859055492, |
|
"learning_rate": 1.428077083931907e-07, |
|
"logits": -1.5156207084655762, |
|
"logps": -84.15880584716797, |
|
"loss": 51.1684, |
|
"objective": 52.4562873840332, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.12656886875629425, |
|
"step": 715, |
|
"wo_beta": 15.599425315856934 |
|
}, |
|
{ |
|
"dpo_loss": 0.5437305569648743, |
|
"epoch": 2.040623523854511, |
|
"grad_norm": 15758.315666253699, |
|
"learning_rate": 1.3908811141480406e-07, |
|
"logits": -1.4699770212173462, |
|
"logps": -84.7515869140625, |
|
"loss": 45.1211, |
|
"objective": 38.25383758544922, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.10041950643062592, |
|
"step": 720, |
|
"wo_beta": 16.25193214416504 |
|
}, |
|
{ |
|
"dpo_loss": 0.545520544052124, |
|
"epoch": 2.0547945205479454, |
|
"grad_norm": 16201.394561682804, |
|
"learning_rate": 1.353988366393083e-07, |
|
"logits": -1.5007617473602295, |
|
"logps": -86.95757293701172, |
|
"loss": 53.076, |
|
"objective": 52.1579704284668, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.12382372468709946, |
|
"step": 725, |
|
"wo_beta": 14.748201370239258 |
|
}, |
|
{ |
|
"dpo_loss": 0.5444363951683044, |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 18420.501731961805, |
|
"learning_rate": 1.3174089267758982e-07, |
|
"logits": -1.57591712474823, |
|
"logps": -84.48290252685547, |
|
"loss": 52.1829, |
|
"objective": 51.44971466064453, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.12323012948036194, |
|
"step": 730, |
|
"wo_beta": 15.643149375915527 |
|
}, |
|
{ |
|
"dpo_loss": 0.5511536002159119, |
|
"epoch": 2.0831365139348135, |
|
"grad_norm": 15885.917813358059, |
|
"learning_rate": 1.2811527957500343e-07, |
|
"logits": -1.499257206916809, |
|
"logps": -84.50511169433594, |
|
"loss": 44.0743, |
|
"objective": 49.55210876464844, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.13047467172145844, |
|
"step": 735, |
|
"wo_beta": 17.085899353027344 |
|
}, |
|
{ |
|
"dpo_loss": 0.5526978969573975, |
|
"epoch": 2.0973075106282475, |
|
"grad_norm": 16073.187372319342, |
|
"learning_rate": 1.245229885379699e-07, |
|
"logits": -1.5387953519821167, |
|
"logps": -84.2315444946289, |
|
"loss": 48.6885, |
|
"objective": 53.333866119384766, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.12818403542041779, |
|
"step": 740, |
|
"wo_beta": 15.79825496673584 |
|
}, |
|
{ |
|
"dpo_loss": 0.5457006692886353, |
|
"epoch": 2.1114785073216815, |
|
"grad_norm": 18439.02982973444, |
|
"learning_rate": 1.209650016629899e-07, |
|
"logits": -1.4960881471633911, |
|
"logps": -84.55073547363281, |
|
"loss": 49.7231, |
|
"objective": 46.95569610595703, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.11624018102884293, |
|
"step": 745, |
|
"wo_beta": 15.793415069580078 |
|
}, |
|
{ |
|
"dpo_loss": 0.5496144890785217, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 16557.264742662323, |
|
"learning_rate": 1.1744229166814886e-07, |
|
"logits": -1.511896014213562, |
|
"logps": -83.8011245727539, |
|
"loss": 48.9708, |
|
"objective": 45.016258239746094, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.11198277026414871, |
|
"step": 750, |
|
"wo_beta": 15.621644020080566 |
|
}, |
|
{ |
|
"epoch": 2.1256495040151155, |
|
"eval_dpo_loss": 0.6794183254241943, |
|
"eval_logits": -1.491926908493042, |
|
"eval_logps": -92.19331359863281, |
|
"eval_loss": 179.8103485107422, |
|
"eval_objective": 176.70277404785156, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5274327397346497, |
|
"eval_regularize": 0.4011208415031433, |
|
"eval_runtime": 369.337, |
|
"eval_samples_per_second": 15.677, |
|
"eval_steps_per_second": 1.308, |
|
"eval_wo_beta": 16.588436126708984, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 0.5448576807975769, |
|
"epoch": 2.13982050070855, |
|
"grad_norm": 17193.17890323298, |
|
"learning_rate": 1.1395582162718523e-07, |
|
"logits": -1.4843658208847046, |
|
"logps": -86.97160339355469, |
|
"loss": 52.8805, |
|
"objective": 47.13671112060547, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.11303378641605377, |
|
"step": 755, |
|
"wo_beta": 16.85382080078125 |
|
}, |
|
{ |
|
"dpo_loss": 0.5693633556365967, |
|
"epoch": 2.153991497401984, |
|
"grad_norm": 16501.093616184942, |
|
"learning_rate": 1.10506544706196e-07, |
|
"logits": -1.4409741163253784, |
|
"logps": -83.32089233398438, |
|
"loss": 48.6534, |
|
"objective": 42.576026916503906, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.10865426063537598, |
|
"step": 760, |
|
"wo_beta": 14.651175498962402 |
|
}, |
|
{ |
|
"dpo_loss": 0.550613522529602, |
|
"epoch": 2.168162494095418, |
|
"grad_norm": 17100.049348037002, |
|
"learning_rate": 1.0709540390305061e-07, |
|
"logits": -1.4873898029327393, |
|
"logps": -84.55794525146484, |
|
"loss": 48.0396, |
|
"objective": 42.508872985839844, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.10587514191865921, |
|
"step": 765, |
|
"wo_beta": 16.372541427612305 |
|
}, |
|
{ |
|
"dpo_loss": 0.5513295531272888, |
|
"epoch": 2.182333490788852, |
|
"grad_norm": 16090.026254449373, |
|
"learning_rate": 1.0372333178958462e-07, |
|
"logits": -1.5015202760696411, |
|
"logps": -84.9146499633789, |
|
"loss": 48.0411, |
|
"objective": 50.020198822021484, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.12123490124940872, |
|
"step": 770, |
|
"wo_beta": 14.4235200881958 |
|
}, |
|
{ |
|
"dpo_loss": 0.5526517629623413, |
|
"epoch": 2.196504487482286, |
|
"grad_norm": 16092.050129989755, |
|
"learning_rate": 1.0039125025664391e-07, |
|
"logits": -1.4631909132003784, |
|
"logps": -86.0343017578125, |
|
"loss": 50.4495, |
|
"objective": 48.98077392578125, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5791666507720947, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.12355396151542664, |
|
"step": 775, |
|
"wo_beta": 17.455713272094727 |
|
}, |
|
{ |
|
"dpo_loss": 0.5543543100357056, |
|
"epoch": 2.21067548417572, |
|
"grad_norm": 17672.341083343217, |
|
"learning_rate": 9.710007026204894e-08, |
|
"logits": -1.4037829637527466, |
|
"logps": -86.01419067382812, |
|
"loss": 48.6758, |
|
"objective": 49.57035446166992, |
|
"ranking_idealized": 0.512499988079071, |
|
"ranking_idealized_expo": 0.44583332538604736, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.1090199202299118, |
|
"step": 780, |
|
"wo_beta": 15.31540298461914 |
|
}, |
|
{ |
|
"dpo_loss": 0.5374571681022644, |
|
"epoch": 2.2248464808691546, |
|
"grad_norm": 17104.605257535815, |
|
"learning_rate": 9.385069158154805e-08, |
|
"logits": -1.4150718450546265, |
|
"logps": -85.85627746582031, |
|
"loss": 47.7928, |
|
"objective": 41.365787506103516, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.10406169295310974, |
|
"step": 785, |
|
"wo_beta": 15.260098457336426 |
|
}, |
|
{ |
|
"dpo_loss": 0.5317620635032654, |
|
"epoch": 2.2390174775625886, |
|
"grad_norm": 16507.88081841276, |
|
"learning_rate": 9.064400256282755e-08, |
|
"logits": -1.399611234664917, |
|
"logps": -86.44268035888672, |
|
"loss": 52.3325, |
|
"objective": 45.95266342163086, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.1156383752822876, |
|
"step": 790, |
|
"wo_beta": 15.514897346496582 |
|
}, |
|
{ |
|
"dpo_loss": 0.5416150689125061, |
|
"epoch": 2.2531884742560226, |
|
"grad_norm": 14862.627085415834, |
|
"learning_rate": 8.748087988264668e-08, |
|
"logits": -1.4897602796554565, |
|
"logps": -87.291259765625, |
|
"loss": 50.1301, |
|
"objective": 47.57838439941406, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.12325119227170944, |
|
"step": 795, |
|
"wo_beta": 14.180596351623535 |
|
}, |
|
{ |
|
"dpo_loss": 0.5349844694137573, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 16155.361379646765, |
|
"learning_rate": 8.436218830716258e-08, |
|
"logits": -1.5085468292236328, |
|
"logps": -86.47720336914062, |
|
"loss": 51.9463, |
|
"objective": 60.657657623291016, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.13889312744140625, |
|
"step": 800, |
|
"wo_beta": 14.652113914489746 |
|
}, |
|
{ |
|
"epoch": 2.2673594709494567, |
|
"eval_dpo_loss": 0.6781792044639587, |
|
"eval_logits": -1.4993284940719604, |
|
"eval_logps": -92.00647735595703, |
|
"eval_loss": 179.21780395507812, |
|
"eval_objective": 175.7035675048828, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5289855003356934, |
|
"eval_regularize": 0.39864060282707214, |
|
"eval_runtime": 370.6492, |
|
"eval_samples_per_second": 15.621, |
|
"eval_steps_per_second": 1.303, |
|
"eval_wo_beta": 16.56892204284668, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 0.5452725291252136, |
|
"epoch": 2.2815304676428907, |
|
"grad_norm": 16570.72383235664, |
|
"learning_rate": 8.1288780455512e-08, |
|
"logits": -1.5190993547439575, |
|
"logps": -84.90750122070312, |
|
"loss": 45.9105, |
|
"objective": 44.02467346191406, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.10332147032022476, |
|
"step": 805, |
|
"wo_beta": 16.12928581237793 |
|
}, |
|
{ |
|
"dpo_loss": 0.5472940802574158, |
|
"epoch": 2.295701464336325, |
|
"grad_norm": 17306.744795453895, |
|
"learning_rate": 7.826149656671385e-08, |
|
"logits": -1.6159324645996094, |
|
"logps": -84.2494888305664, |
|
"loss": 48.3513, |
|
"objective": 51.42966079711914, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.1283574104309082, |
|
"step": 810, |
|
"wo_beta": 14.84490966796875 |
|
}, |
|
{ |
|
"dpo_loss": 0.5408957004547119, |
|
"epoch": 2.309872461029759, |
|
"grad_norm": 17714.10311614495, |
|
"learning_rate": 7.528116426995604e-08, |
|
"logits": -1.5414897203445435, |
|
"logps": -86.03276824951172, |
|
"loss": 44.4155, |
|
"objective": 42.15602493286133, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.09716067463159561, |
|
"step": 815, |
|
"wo_beta": 15.411224365234375 |
|
}, |
|
{ |
|
"dpo_loss": 0.5434551239013672, |
|
"epoch": 2.324043457723193, |
|
"grad_norm": 16657.377741221597, |
|
"learning_rate": 7.234859835833021e-08, |
|
"logits": -1.4976943731307983, |
|
"logps": -85.22306060791016, |
|
"loss": 44.4464, |
|
"objective": 42.74842834472656, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.1011769101023674, |
|
"step": 820, |
|
"wo_beta": 15.093592643737793 |
|
}, |
|
{ |
|
"dpo_loss": 0.5428169965744019, |
|
"epoch": 2.3382144544166272, |
|
"grad_norm": 16447.115430947913, |
|
"learning_rate": 6.94646005660749e-08, |
|
"logits": -1.5226491689682007, |
|
"logps": -84.4798812866211, |
|
"loss": 45.0833, |
|
"objective": 45.554691314697266, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.11467791348695755, |
|
"step": 825, |
|
"wo_beta": 16.547521591186523 |
|
}, |
|
{ |
|
"dpo_loss": 0.5347627997398376, |
|
"epoch": 2.3523854511100613, |
|
"grad_norm": 16335.000287824352, |
|
"learning_rate": 6.662995934939006e-08, |
|
"logits": -1.5204423666000366, |
|
"logps": -86.52505493164062, |
|
"loss": 49.1483, |
|
"objective": 49.5906867980957, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.11676025390625, |
|
"step": 830, |
|
"wo_beta": 16.500276565551758 |
|
}, |
|
{ |
|
"dpo_loss": 0.5479462742805481, |
|
"epoch": 2.3665564478034957, |
|
"grad_norm": 16959.398846741216, |
|
"learning_rate": 6.384544967088063e-08, |
|
"logits": -1.462269902229309, |
|
"logps": -86.40924835205078, |
|
"loss": 48.4639, |
|
"objective": 45.90879821777344, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.11642123758792877, |
|
"step": 835, |
|
"wo_beta": 15.002260208129883 |
|
}, |
|
{ |
|
"dpo_loss": 0.5614480376243591, |
|
"epoch": 2.3807274444969297, |
|
"grad_norm": 15851.156516804678, |
|
"learning_rate": 6.111183278768955e-08, |
|
"logits": -1.4380650520324707, |
|
"logps": -86.09837341308594, |
|
"loss": 40.9965, |
|
"objective": 37.455379486083984, |
|
"ranking_idealized": 0.5249999761581421, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5, |
|
"regularize": 0.0973404198884964, |
|
"step": 840, |
|
"wo_beta": 15.861971855163574 |
|
}, |
|
{ |
|
"dpo_loss": 0.5543637871742249, |
|
"epoch": 2.3948984411903638, |
|
"grad_norm": 19702.171994535533, |
|
"learning_rate": 5.842985604337769e-08, |
|
"logits": -1.4723432064056396, |
|
"logps": -87.03990173339844, |
|
"loss": 47.1256, |
|
"objective": 46.43952941894531, |
|
"ranking_idealized": 0.5958333611488342, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.11250331997871399, |
|
"step": 845, |
|
"wo_beta": 16.375986099243164 |
|
}, |
|
{ |
|
"dpo_loss": 0.5473025441169739, |
|
"epoch": 2.409069437883798, |
|
"grad_norm": 16397.021305258124, |
|
"learning_rate": 5.5800252663607636e-08, |
|
"logits": -1.509826898574829, |
|
"logps": -85.1669692993164, |
|
"loss": 44.3463, |
|
"objective": 44.0050048828125, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.10994389653205872, |
|
"step": 850, |
|
"wo_beta": 15.741286277770996 |
|
}, |
|
{ |
|
"epoch": 2.409069437883798, |
|
"eval_dpo_loss": 0.6782696843147278, |
|
"eval_logits": -1.4917659759521484, |
|
"eval_logps": -92.23719787597656, |
|
"eval_loss": 179.17352294921875, |
|
"eval_objective": 175.77769470214844, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5284678936004639, |
|
"eval_regularize": 0.39879217743873596, |
|
"eval_runtime": 368.8355, |
|
"eval_samples_per_second": 15.698, |
|
"eval_steps_per_second": 1.31, |
|
"eval_wo_beta": 16.56818962097168, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 0.5434221625328064, |
|
"epoch": 2.423240434577232, |
|
"grad_norm": 14761.032913238774, |
|
"learning_rate": 5.3223741555686873e-08, |
|
"logits": -1.537110447883606, |
|
"logps": -84.45201110839844, |
|
"loss": 43.1862, |
|
"objective": 41.27051544189453, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.10142151266336441, |
|
"step": 855, |
|
"wo_beta": 15.314756393432617 |
|
}, |
|
{ |
|
"dpo_loss": 0.5529573559761047, |
|
"epoch": 2.4374114312706663, |
|
"grad_norm": 16145.709635641595, |
|
"learning_rate": 5.070102711202606e-08, |
|
"logits": -1.4745042324066162, |
|
"logps": -85.98120880126953, |
|
"loss": 42.9052, |
|
"objective": 38.47713088989258, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.09888458997011185, |
|
"step": 860, |
|
"wo_beta": 16.20383644104004 |
|
}, |
|
{ |
|
"dpo_loss": 0.551279604434967, |
|
"epoch": 2.4515824279641003, |
|
"grad_norm": 18278.1439042312, |
|
"learning_rate": 4.8232799017564967e-08, |
|
"logits": -1.4951705932617188, |
|
"logps": -85.17224884033203, |
|
"loss": 44.7949, |
|
"objective": 45.912452697753906, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.11339430510997772, |
|
"step": 865, |
|
"wo_beta": 16.40469741821289 |
|
}, |
|
{ |
|
"dpo_loss": 0.5488017201423645, |
|
"epoch": 2.4657534246575343, |
|
"grad_norm": 15873.838165857398, |
|
"learning_rate": 4.5819732061219475e-08, |
|
"logits": -1.5395283699035645, |
|
"logps": -85.87442779541016, |
|
"loss": 45.7505, |
|
"objective": 46.77009963989258, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.11627896875143051, |
|
"step": 870, |
|
"wo_beta": 15.758785247802734 |
|
}, |
|
{ |
|
"dpo_loss": 0.5411531329154968, |
|
"epoch": 2.4799244213509684, |
|
"grad_norm": 16103.251582257983, |
|
"learning_rate": 4.346248595140112e-08, |
|
"logits": -1.4675084352493286, |
|
"logps": -85.36338806152344, |
|
"loss": 46.2974, |
|
"objective": 47.864070892333984, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.11755504459142685, |
|
"step": 875, |
|
"wo_beta": 17.365110397338867 |
|
}, |
|
{ |
|
"dpo_loss": 0.5485495924949646, |
|
"epoch": 2.4940954180444024, |
|
"grad_norm": 15694.189473425999, |
|
"learning_rate": 4.116170513565942e-08, |
|
"logits": -1.3954468965530396, |
|
"logps": -85.28124237060547, |
|
"loss": 41.7812, |
|
"objective": 37.13809585571289, |
|
"ranking_idealized": 0.5625, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.0931786298751831, |
|
"step": 880, |
|
"wo_beta": 17.850427627563477 |
|
}, |
|
{ |
|
"dpo_loss": 0.5530834794044495, |
|
"epoch": 2.5082664147378364, |
|
"grad_norm": 17596.947495882203, |
|
"learning_rate": 3.8918018624496286e-08, |
|
"logits": -1.562106728553772, |
|
"logps": -84.21708679199219, |
|
"loss": 45.0999, |
|
"objective": 50.288021087646484, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.11681138724088669, |
|
"step": 885, |
|
"wo_beta": 17.1990909576416 |
|
}, |
|
{ |
|
"dpo_loss": 0.5438919067382812, |
|
"epoch": 2.5224374114312704, |
|
"grad_norm": 17590.89973277564, |
|
"learning_rate": 3.673203981940068e-08, |
|
"logits": -1.4610990285873413, |
|
"logps": -83.57906341552734, |
|
"loss": 45.9865, |
|
"objective": 43.9241828918457, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.11596141010522842, |
|
"step": 890, |
|
"wo_beta": 14.2067232131958 |
|
}, |
|
{ |
|
"dpo_loss": 0.5475446581840515, |
|
"epoch": 2.536608408124705, |
|
"grad_norm": 16852.292477633127, |
|
"learning_rate": 3.46043663451511e-08, |
|
"logits": -1.456311583518982, |
|
"logps": -85.9644775390625, |
|
"loss": 40.1267, |
|
"objective": 40.91215133666992, |
|
"ranking_idealized": 0.5458333492279053, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.0973983108997345, |
|
"step": 895, |
|
"wo_beta": 16.769834518432617 |
|
}, |
|
{ |
|
"dpo_loss": 0.5658264756202698, |
|
"epoch": 2.550779404818139, |
|
"grad_norm": 16973.16020360833, |
|
"learning_rate": 3.2535579886430715e-08, |
|
"logits": -1.4089369773864746, |
|
"logps": -84.34557342529297, |
|
"loss": 44.3015, |
|
"objective": 48.17121124267578, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.10806681215763092, |
|
"step": 900, |
|
"wo_beta": 15.534666061401367 |
|
}, |
|
{ |
|
"epoch": 2.550779404818139, |
|
"eval_dpo_loss": 0.678382933139801, |
|
"eval_logits": -1.4982877969741821, |
|
"eval_logps": -92.18975830078125, |
|
"eval_loss": 179.15904235839844, |
|
"eval_objective": 175.8240203857422, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5279502868652344, |
|
"eval_regularize": 0.3989744186401367, |
|
"eval_runtime": 368.6241, |
|
"eval_samples_per_second": 15.707, |
|
"eval_steps_per_second": 1.31, |
|
"eval_wo_beta": 16.590484619140625, |
|
"step": 900 |
|
}, |
|
{ |
|
"dpo_loss": 0.5509156584739685, |
|
"epoch": 2.564950401511573, |
|
"grad_norm": 14234.779776824053, |
|
"learning_rate": 3.052624602880063e-08, |
|
"logits": -1.5034754276275635, |
|
"logps": -84.19306945800781, |
|
"loss": 40.4357, |
|
"objective": 38.76914978027344, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.09935871511697769, |
|
"step": 905, |
|
"wo_beta": 15.227039337158203 |
|
}, |
|
{ |
|
"dpo_loss": 0.5412671566009521, |
|
"epoch": 2.579121398205007, |
|
"grad_norm": 16823.804354079846, |
|
"learning_rate": 2.8576914104074423e-08, |
|
"logits": -1.4797313213348389, |
|
"logps": -87.3152084350586, |
|
"loss": 41.7421, |
|
"objective": 45.547401428222656, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.10913080722093582, |
|
"step": 910, |
|
"wo_beta": 16.365530014038086 |
|
}, |
|
{ |
|
"dpo_loss": 0.5502139329910278, |
|
"epoch": 2.593292394898441, |
|
"grad_norm": 15853.539663724245, |
|
"learning_rate": 2.668811704013646e-08, |
|
"logits": -1.5921828746795654, |
|
"logps": -86.05388641357422, |
|
"loss": 42.1486, |
|
"objective": 39.99686050415039, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.103940449655056, |
|
"step": 915, |
|
"wo_beta": 15.442140579223633 |
|
}, |
|
{ |
|
"dpo_loss": 0.5391423106193542, |
|
"epoch": 2.6074633915918755, |
|
"grad_norm": 16570.329855367927, |
|
"learning_rate": 2.486037121524448e-08, |
|
"logits": -1.4353820085525513, |
|
"logps": -85.45365905761719, |
|
"loss": 46.2712, |
|
"objective": 45.818546295166016, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.10537134110927582, |
|
"step": 920, |
|
"wo_beta": 15.817992210388184 |
|
}, |
|
{ |
|
"dpo_loss": 0.5430201888084412, |
|
"epoch": 2.6216343882853095, |
|
"grad_norm": 16860.801218719196, |
|
"learning_rate": 2.3094176316856978e-08, |
|
"logits": -1.4627550840377808, |
|
"logps": -85.5040512084961, |
|
"loss": 41.8789, |
|
"objective": 39.82048797607422, |
|
"ranking_idealized": 0.574999988079071, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.10451699048280716, |
|
"step": 925, |
|
"wo_beta": 16.46214485168457 |
|
}, |
|
{ |
|
"dpo_loss": 0.5547136068344116, |
|
"epoch": 2.6358053849787435, |
|
"grad_norm": 18548.498992469253, |
|
"learning_rate": 2.1390015205023898e-08, |
|
"logits": -1.4610332250595093, |
|
"logps": -85.3515625, |
|
"loss": 44.5914, |
|
"objective": 45.85152053833008, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.44583332538604736, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.11503276228904724, |
|
"step": 930, |
|
"wo_beta": 16.94695281982422 |
|
}, |
|
{ |
|
"dpo_loss": 0.5648698210716248, |
|
"epoch": 2.6499763816721775, |
|
"grad_norm": 16823.626472777796, |
|
"learning_rate": 1.974835378037723e-08, |
|
"logits": -1.4719030857086182, |
|
"logps": -84.22066497802734, |
|
"loss": 42.8827, |
|
"objective": 47.928733825683594, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.11680851131677628, |
|
"step": 935, |
|
"wo_beta": 16.337308883666992 |
|
}, |
|
{ |
|
"dpo_loss": 0.5417830944061279, |
|
"epoch": 2.6641473783656116, |
|
"grad_norm": 19648.95461433399, |
|
"learning_rate": 1.816964085675865e-08, |
|
"logits": -1.50851309299469, |
|
"logps": -87.09917449951172, |
|
"loss": 44.1259, |
|
"objective": 46.947654724121094, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.112226702272892, |
|
"step": 940, |
|
"wo_beta": 17.42079734802246 |
|
}, |
|
{ |
|
"dpo_loss": 0.5409041047096252, |
|
"epoch": 2.678318375059046, |
|
"grad_norm": 17233.04175831224, |
|
"learning_rate": 1.6654308038518056e-08, |
|
"logits": -1.544434905052185, |
|
"logps": -85.57273864746094, |
|
"loss": 41.238, |
|
"objective": 48.42988967895508, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.11696865409612656, |
|
"step": 945, |
|
"wo_beta": 15.977194786071777 |
|
}, |
|
{ |
|
"dpo_loss": 0.554226815700531, |
|
"epoch": 2.69248937175248, |
|
"grad_norm": 17994.23017373139, |
|
"learning_rate": 1.520276960251751e-08, |
|
"logits": -1.4273337125778198, |
|
"logps": -85.7279281616211, |
|
"loss": 43.4164, |
|
"objective": 41.65528106689453, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.1018596738576889, |
|
"step": 950, |
|
"wo_beta": 15.610276222229004 |
|
}, |
|
{ |
|
"epoch": 2.69248937175248, |
|
"eval_dpo_loss": 0.6785179376602173, |
|
"eval_logits": -1.4967025518417358, |
|
"eval_logps": -92.20464324951172, |
|
"eval_loss": 179.2801055908203, |
|
"eval_objective": 176.04083251953125, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5274327397346497, |
|
"eval_regularize": 0.39928528666496277, |
|
"eval_runtime": 370.2035, |
|
"eval_samples_per_second": 15.64, |
|
"eval_steps_per_second": 1.305, |
|
"eval_wo_beta": 16.589099884033203, |
|
"step": 950 |
|
}, |
|
{ |
|
"dpo_loss": 0.5416913628578186, |
|
"epoch": 2.706660368445914, |
|
"grad_norm": 16913.4446377394, |
|
"learning_rate": 1.3815422384871878e-08, |
|
"logits": -1.5223019123077393, |
|
"logps": -85.46512603759766, |
|
"loss": 41.9154, |
|
"objective": 39.07767105102539, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.10648568719625473, |
|
"step": 955, |
|
"wo_beta": 14.056703567504883 |
|
}, |
|
{ |
|
"dpo_loss": 0.5538465976715088, |
|
"epoch": 2.720831365139348, |
|
"grad_norm": 16463.281607975867, |
|
"learning_rate": 1.2492645672457836e-08, |
|
"logits": -1.4985733032226562, |
|
"logps": -85.17173767089844, |
|
"loss": 42.6366, |
|
"objective": 43.174072265625, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.10360194742679596, |
|
"step": 960, |
|
"wo_beta": 16.368473052978516 |
|
}, |
|
{ |
|
"dpo_loss": 0.537990391254425, |
|
"epoch": 2.735002361832782, |
|
"grad_norm": 17961.612959662347, |
|
"learning_rate": 1.1234801099220786e-08, |
|
"logits": -1.509239912033081, |
|
"logps": -85.44644165039062, |
|
"loss": 39.0678, |
|
"objective": 39.20804977416992, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.10068784654140472, |
|
"step": 965, |
|
"wo_beta": 15.433809280395508 |
|
}, |
|
{ |
|
"dpo_loss": 0.5542954802513123, |
|
"epoch": 2.7491733585262166, |
|
"grad_norm": 15088.900869572226, |
|
"learning_rate": 1.004223254730749e-08, |
|
"logits": -1.519822597503662, |
|
"logps": -86.41224670410156, |
|
"loss": 44.4091, |
|
"objective": 43.453468322753906, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.11093102395534515, |
|
"step": 970, |
|
"wo_beta": 15.656749725341797 |
|
}, |
|
{ |
|
"dpo_loss": 0.5459226965904236, |
|
"epoch": 2.7633443552196506, |
|
"grad_norm": 17863.309850864207, |
|
"learning_rate": 8.915266053052373e-09, |
|
"logits": -1.4553431272506714, |
|
"logps": -84.71710968017578, |
|
"loss": 44.5817, |
|
"objective": 54.85538101196289, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.13891781866550446, |
|
"step": 975, |
|
"wo_beta": 16.4742374420166 |
|
}, |
|
{ |
|
"dpo_loss": 0.5517151355743408, |
|
"epoch": 2.7775153519130846, |
|
"grad_norm": 17532.917025188563, |
|
"learning_rate": 7.85420971784223e-09, |
|
"logits": -1.5741106271743774, |
|
"logps": -84.43816375732422, |
|
"loss": 49.1041, |
|
"objective": 50.19886016845703, |
|
"ranking_idealized": 0.5916666388511658, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.12199045717716217, |
|
"step": 980, |
|
"wo_beta": 14.642070770263672 |
|
}, |
|
{ |
|
"dpo_loss": 0.5505563020706177, |
|
"epoch": 2.7916863486065187, |
|
"grad_norm": 15695.994208260572, |
|
"learning_rate": 6.859353623884567e-09, |
|
"logits": -1.4095691442489624, |
|
"logps": -82.93616485595703, |
|
"loss": 43.0667, |
|
"objective": 50.173011779785156, |
|
"ranking_idealized": 0.5791666507720947, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.1220245286822319, |
|
"step": 985, |
|
"wo_beta": 14.67066764831543 |
|
}, |
|
{ |
|
"dpo_loss": 0.5449987649917603, |
|
"epoch": 2.8058573452999527, |
|
"grad_norm": 15965.60549784291, |
|
"learning_rate": 5.930969754901843e-09, |
|
"logits": -1.484297275543213, |
|
"logps": -86.44190979003906, |
|
"loss": 44.9483, |
|
"objective": 41.374794006347656, |
|
"ranking_idealized": 0.5666666626930237, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.10469052940607071, |
|
"step": 990, |
|
"wo_beta": 16.099658966064453 |
|
}, |
|
{ |
|
"dpo_loss": 0.5509870648384094, |
|
"epoch": 2.820028341993387, |
|
"grad_norm": 15636.732404031713, |
|
"learning_rate": 5.069311921774039e-09, |
|
"logits": -1.531805396080017, |
|
"logps": -84.8103256225586, |
|
"loss": 43.9212, |
|
"objective": 41.92687225341797, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.11357491463422775, |
|
"step": 995, |
|
"wo_beta": 16.878124237060547 |
|
}, |
|
{ |
|
"dpo_loss": 0.544861376285553, |
|
"epoch": 2.8341993386868207, |
|
"grad_norm": 15284.06322793647, |
|
"learning_rate": 4.274615693149075e-09, |
|
"logits": -1.4928451776504517, |
|
"logps": -83.65907287597656, |
|
"loss": 43.6009, |
|
"objective": 46.88129425048828, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.11148179322481155, |
|
"step": 1000, |
|
"wo_beta": 15.039312362670898 |
|
}, |
|
{ |
|
"epoch": 2.8341993386868207, |
|
"eval_dpo_loss": 0.6785008311271667, |
|
"eval_logits": -1.4978208541870117, |
|
"eval_logps": -92.27050018310547, |
|
"eval_loss": 179.2790985107422, |
|
"eval_objective": 175.9962921142578, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5279502868652344, |
|
"eval_regularize": 0.39921271800994873, |
|
"eval_runtime": 373.1717, |
|
"eval_samples_per_second": 15.516, |
|
"eval_steps_per_second": 1.294, |
|
"eval_wo_beta": 16.587968826293945, |
|
"step": 1000 |
|
}, |
|
{ |
|
"dpo_loss": 0.5515304803848267, |
|
"epoch": 2.848370335380255, |
|
"grad_norm": 16385.652650772114, |
|
"learning_rate": 3.547098331040915e-09, |
|
"logits": -1.482871413230896, |
|
"logps": -84.16107940673828, |
|
"loss": 41.5394, |
|
"objective": 43.54771041870117, |
|
"ranking_idealized": 0.6000000238418579, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.11037115752696991, |
|
"step": 1005, |
|
"wo_beta": 16.216983795166016 |
|
}, |
|
{ |
|
"dpo_loss": 0.5438867211341858, |
|
"epoch": 2.862541332073689, |
|
"grad_norm": 16774.77560364275, |
|
"learning_rate": 2.886958731432132e-09, |
|
"logits": -1.542305827140808, |
|
"logps": -84.64833068847656, |
|
"loss": 43.5186, |
|
"objective": 44.43704605102539, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.10451284050941467, |
|
"step": 1010, |
|
"wo_beta": 15.962403297424316 |
|
}, |
|
{ |
|
"dpo_loss": 0.5501060485839844, |
|
"epoch": 2.8767123287671232, |
|
"grad_norm": 17522.630643855955, |
|
"learning_rate": 2.294377369897793e-09, |
|
"logits": -1.4613019227981567, |
|
"logps": -83.6242446899414, |
|
"loss": 42.9096, |
|
"objective": 43.5313720703125, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.1068674772977829, |
|
"step": 1015, |
|
"wo_beta": 15.568625450134277 |
|
}, |
|
{ |
|
"dpo_loss": 0.549178957939148, |
|
"epoch": 2.8908833254605573, |
|
"grad_norm": 16355.818495658159, |
|
"learning_rate": 1.769516252265235e-09, |
|
"logits": -1.4256434440612793, |
|
"logps": -85.48424530029297, |
|
"loss": 41.2726, |
|
"objective": 41.4518928527832, |
|
"ranking_idealized": 0.5833333134651184, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.0968986377120018, |
|
"step": 1020, |
|
"wo_beta": 18.603649139404297 |
|
}, |
|
{ |
|
"dpo_loss": 0.5602646470069885, |
|
"epoch": 2.9050543221539913, |
|
"grad_norm": 15838.312706923856, |
|
"learning_rate": 1.3125188703233814e-09, |
|
"logits": -1.5212275981903076, |
|
"logps": -85.22294616699219, |
|
"loss": 43.2553, |
|
"objective": 43.40156555175781, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.1083984375, |
|
"step": 1025, |
|
"wo_beta": 15.021879196166992 |
|
}, |
|
{ |
|
"dpo_loss": 0.5516722798347473, |
|
"epoch": 2.9192253188474258, |
|
"grad_norm": 16291.39704850031, |
|
"learning_rate": 9.235101625932884e-10, |
|
"logits": -1.5682528018951416, |
|
"logps": -85.36617279052734, |
|
"loss": 43.7786, |
|
"objective": 39.36371612548828, |
|
"ranking_idealized": 0.5416666865348816, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.0955829992890358, |
|
"step": 1030, |
|
"wo_beta": 16.441747665405273 |
|
}, |
|
{ |
|
"dpo_loss": 0.5399314761161804, |
|
"epoch": 2.9333963155408598, |
|
"grad_norm": 16748.657990232314, |
|
"learning_rate": 6.025964801714411e-10, |
|
"logits": -1.5260014533996582, |
|
"logps": -86.03439331054688, |
|
"loss": 41.3554, |
|
"objective": 38.64834976196289, |
|
"ranking_idealized": 0.625, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.0944010317325592, |
|
"step": 1035, |
|
"wo_beta": 14.993240356445312 |
|
}, |
|
{ |
|
"dpo_loss": 0.5549695491790771, |
|
"epoch": 2.947567312234294, |
|
"grad_norm": 15029.578861688902, |
|
"learning_rate": 3.498655576543441e-10, |
|
"logits": -1.5111292600631714, |
|
"logps": -85.76802062988281, |
|
"loss": 38.8328, |
|
"objective": 38.22753143310547, |
|
"ranking_idealized": 0.5708333253860474, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.095832958817482, |
|
"step": 1040, |
|
"wo_beta": 16.434364318847656 |
|
}, |
|
{ |
|
"dpo_loss": 0.5555641651153564, |
|
"epoch": 2.961738308927728, |
|
"grad_norm": 17992.01977336534, |
|
"learning_rate": 1.6538648915270793e-10, |
|
"logits": -1.481310248374939, |
|
"logps": -87.40520477294922, |
|
"loss": 38.5959, |
|
"objective": 39.465171813964844, |
|
"ranking_idealized": 0.6208333373069763, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.10020165145397186, |
|
"step": 1045, |
|
"wo_beta": 18.04179573059082 |
|
}, |
|
{ |
|
"dpo_loss": 0.5507573485374451, |
|
"epoch": 2.975909305621162, |
|
"grad_norm": 17353.770623615765, |
|
"learning_rate": 4.920970940180957e-11, |
|
"logits": -1.522560715675354, |
|
"logps": -83.40792083740234, |
|
"loss": 47.7054, |
|
"objective": 48.42831802368164, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.121465764939785, |
|
"step": 1050, |
|
"wo_beta": 15.164950370788574 |
|
}, |
|
{ |
|
"epoch": 2.975909305621162, |
|
"eval_dpo_loss": 0.6784854531288147, |
|
"eval_logits": -1.4974991083145142, |
|
"eval_logps": -92.26132202148438, |
|
"eval_loss": 179.26217651367188, |
|
"eval_objective": 175.9752197265625, |
|
"eval_ranking_idealized": 0.6024844646453857, |
|
"eval_ranking_idealized_expo": 0.5232919454574585, |
|
"eval_ranking_simple": 0.5279502868652344, |
|
"eval_regularize": 0.39915931224823, |
|
"eval_runtime": 370.7869, |
|
"eval_samples_per_second": 15.615, |
|
"eval_steps_per_second": 1.303, |
|
"eval_wo_beta": 16.585628509521484, |
|
"step": 1050 |
|
}, |
|
{ |
|
"dpo_loss": 0.5567707419395447, |
|
"epoch": 2.9900803023145963, |
|
"grad_norm": 17458.927279367348, |
|
"learning_rate": 1.3669799732163311e-12, |
|
"logits": -1.4982311725616455, |
|
"logps": -84.29004669189453, |
|
"loss": 47.3347, |
|
"objective": 49.85600662231445, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.12724058330059052, |
|
"step": 1055, |
|
"wo_beta": 15.180956840515137 |
|
}, |
|
{ |
|
"epoch": 2.992914501653283, |
|
"step": 1056, |
|
"total_flos": 0.0, |
|
"train_loss": 87.93023242011215, |
|
"train_runtime": 38512.4809, |
|
"train_samples_per_second": 3.957, |
|
"train_steps_per_second": 0.027 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1056, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|