|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.992914501653283, |
|
"eval_steps": 50, |
|
"global_step": 1056, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.002834199338686821, |
|
"grad_norm": 26513.418063359753, |
|
"learning_rate": 4.716981132075472e-08, |
|
"logits": -1.2867579460144043, |
|
"logps": -84.34933471679688, |
|
"loss": 458.1349, |
|
"objective": 431.1807556152344, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.3618059456348419, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_loss": 0.6925470232963562, |
|
"epoch": 0.014170996693434105, |
|
"grad_norm": 25616.845196611706, |
|
"learning_rate": 2.358490566037736e-07, |
|
"logits": -1.4298049211502075, |
|
"logps": -83.76395416259766, |
|
"loss": 481.1315, |
|
"objective": 456.7127990722656, |
|
"ranking_simple": 0.4895833432674408, |
|
"regularize": 0.40281566977500916, |
|
"step": 5 |
|
}, |
|
{ |
|
"dpo_loss": 0.6908925175666809, |
|
"epoch": 0.02834199338686821, |
|
"grad_norm": 25634.63923836155, |
|
"learning_rate": 4.716981132075472e-07, |
|
"logits": -1.3988193273544312, |
|
"logps": -84.28076171875, |
|
"loss": 465.4449, |
|
"objective": 457.4544982910156, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.4003960192203522, |
|
"step": 10 |
|
}, |
|
{ |
|
"dpo_loss": 0.6889466643333435, |
|
"epoch": 0.042512990080302314, |
|
"grad_norm": 25839.066108054885, |
|
"learning_rate": 7.075471698113208e-07, |
|
"logits": -1.5292092561721802, |
|
"logps": -83.47270202636719, |
|
"loss": 471.5689, |
|
"objective": 480.5885314941406, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.4446539282798767, |
|
"step": 15 |
|
}, |
|
{ |
|
"dpo_loss": 0.6812318563461304, |
|
"epoch": 0.05668398677373642, |
|
"grad_norm": 27814.64921945046, |
|
"learning_rate": 9.433962264150944e-07, |
|
"logits": -1.368198275566101, |
|
"logps": -82.4678726196289, |
|
"loss": 477.3994, |
|
"objective": 495.29437255859375, |
|
"ranking_simple": 0.48750001192092896, |
|
"regularize": 0.43586504459381104, |
|
"step": 20 |
|
}, |
|
{ |
|
"dpo_loss": 0.681931734085083, |
|
"epoch": 0.07085498346717052, |
|
"grad_norm": 23147.9179925769, |
|
"learning_rate": 1.179245283018868e-06, |
|
"logits": -1.3939018249511719, |
|
"logps": -82.79817962646484, |
|
"loss": 465.0164, |
|
"objective": 451.0166320800781, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.39363664388656616, |
|
"step": 25 |
|
}, |
|
{ |
|
"dpo_loss": 0.6791452765464783, |
|
"epoch": 0.08502598016060463, |
|
"grad_norm": 20393.372937507134, |
|
"learning_rate": 1.4150943396226415e-06, |
|
"logits": -1.4756665229797363, |
|
"logps": -83.33882141113281, |
|
"loss": 460.6366, |
|
"objective": 463.89178466796875, |
|
"ranking_simple": 0.4833333194255829, |
|
"regularize": 0.4173465967178345, |
|
"step": 30 |
|
}, |
|
{ |
|
"dpo_loss": 0.6747857332229614, |
|
"epoch": 0.09919697685403873, |
|
"grad_norm": 23418.03953630638, |
|
"learning_rate": 1.650943396226415e-06, |
|
"logits": -1.4623119831085205, |
|
"logps": -81.88738250732422, |
|
"loss": 469.5609, |
|
"objective": 465.93438720703125, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.3794897198677063, |
|
"step": 35 |
|
}, |
|
{ |
|
"dpo_loss": 0.6748687028884888, |
|
"epoch": 0.11336797354747284, |
|
"grad_norm": 23935.328029038144, |
|
"learning_rate": 1.8867924528301889e-06, |
|
"logits": -1.4446643590927124, |
|
"logps": -82.75718688964844, |
|
"loss": 464.1251, |
|
"objective": 463.13604736328125, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.3904489278793335, |
|
"step": 40 |
|
}, |
|
{ |
|
"dpo_loss": 0.6506677865982056, |
|
"epoch": 0.12753897024090693, |
|
"grad_norm": 21652.885906992553, |
|
"learning_rate": 2.1226415094339624e-06, |
|
"logits": -1.4655033349990845, |
|
"logps": -82.44393157958984, |
|
"loss": 449.3847, |
|
"objective": 443.1551818847656, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.39696159958839417, |
|
"step": 45 |
|
}, |
|
{ |
|
"dpo_loss": 0.6506399512290955, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 21470.261560892, |
|
"learning_rate": 2.358490566037736e-06, |
|
"logits": -1.4927986860275269, |
|
"logps": -86.20177459716797, |
|
"loss": 470.2434, |
|
"objective": 469.0955810546875, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.4380335211753845, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14170996693434104, |
|
"eval_dpo_loss": 0.6872959136962891, |
|
"eval_logits": -1.4968135356903076, |
|
"eval_logps": -94.33769226074219, |
|
"eval_loss": 491.6701965332031, |
|
"eval_objective": 488.937255859375, |
|
"eval_ranking_simple": 0.5269151329994202, |
|
"eval_regularize": 0.4298084080219269, |
|
"eval_runtime": 367.8725, |
|
"eval_samples_per_second": 15.739, |
|
"eval_steps_per_second": 1.313, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.6506890654563904, |
|
"epoch": 0.15588096362777515, |
|
"grad_norm": 19949.76098202467, |
|
"learning_rate": 2.5943396226415095e-06, |
|
"logits": -1.470070719718933, |
|
"logps": -87.6925277709961, |
|
"loss": 445.3158, |
|
"objective": 479.81256103515625, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.3883524239063263, |
|
"step": 55 |
|
}, |
|
{ |
|
"dpo_loss": 0.644140362739563, |
|
"epoch": 0.17005196032120926, |
|
"grad_norm": 19296.57503340127, |
|
"learning_rate": 2.830188679245283e-06, |
|
"logits": -1.3877280950546265, |
|
"logps": -84.86709594726562, |
|
"loss": 439.9873, |
|
"objective": 439.1716613769531, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.4104911684989929, |
|
"step": 60 |
|
}, |
|
{ |
|
"dpo_loss": 0.6160763502120972, |
|
"epoch": 0.18422295701464336, |
|
"grad_norm": 19281.315668093266, |
|
"learning_rate": 3.0660377358490567e-06, |
|
"logits": -1.431338906288147, |
|
"logps": -79.60600280761719, |
|
"loss": 440.686, |
|
"objective": 418.4104309082031, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.36962300539016724, |
|
"step": 65 |
|
}, |
|
{ |
|
"dpo_loss": 0.6181739568710327, |
|
"epoch": 0.19839395370807747, |
|
"grad_norm": 18809.239218176448, |
|
"learning_rate": 3.30188679245283e-06, |
|
"logits": -1.4349617958068848, |
|
"logps": -82.71049499511719, |
|
"loss": 439.5741, |
|
"objective": 443.4054870605469, |
|
"ranking_simple": 0.550000011920929, |
|
"regularize": 0.4068828225135803, |
|
"step": 70 |
|
}, |
|
{ |
|
"dpo_loss": 0.6287192702293396, |
|
"epoch": 0.21256495040151158, |
|
"grad_norm": 20616.517476343855, |
|
"learning_rate": 3.5377358490566038e-06, |
|
"logits": -1.333349585533142, |
|
"logps": -82.84730529785156, |
|
"loss": 457.74, |
|
"objective": 467.5802307128906, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.42490166425704956, |
|
"step": 75 |
|
}, |
|
{ |
|
"dpo_loss": 0.6181908249855042, |
|
"epoch": 0.22673594709494568, |
|
"grad_norm": 20195.88685314973, |
|
"learning_rate": 3.7735849056603777e-06, |
|
"logits": -1.3842099905014038, |
|
"logps": -80.95723724365234, |
|
"loss": 435.7331, |
|
"objective": 441.63299560546875, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.42700326442718506, |
|
"step": 80 |
|
}, |
|
{ |
|
"dpo_loss": 0.6087173819541931, |
|
"epoch": 0.2409069437883798, |
|
"grad_norm": 15942.080314795965, |
|
"learning_rate": 4.009433962264152e-06, |
|
"logits": -1.344446063041687, |
|
"logps": -80.02598571777344, |
|
"loss": 443.4859, |
|
"objective": 464.4291076660156, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.45569831132888794, |
|
"step": 85 |
|
}, |
|
{ |
|
"dpo_loss": 0.5819770693778992, |
|
"epoch": 0.25507794048181387, |
|
"grad_norm": 16092.557140676014, |
|
"learning_rate": 4.245283018867925e-06, |
|
"logits": -1.4104372262954712, |
|
"logps": -81.34741973876953, |
|
"loss": 449.1501, |
|
"objective": 444.44000244140625, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.4178715646266937, |
|
"step": 90 |
|
}, |
|
{ |
|
"dpo_loss": 0.5937064290046692, |
|
"epoch": 0.269248937175248, |
|
"grad_norm": 16918.089764873057, |
|
"learning_rate": 4.481132075471699e-06, |
|
"logits": -1.3456578254699707, |
|
"logps": -83.11612701416016, |
|
"loss": 458.477, |
|
"objective": 447.3981018066406, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.41778501868247986, |
|
"step": 95 |
|
}, |
|
{ |
|
"dpo_loss": 0.6062127351760864, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 16311.430045391364, |
|
"learning_rate": 4.716981132075472e-06, |
|
"logits": -1.3139296770095825, |
|
"logps": -79.3995132446289, |
|
"loss": 444.0833, |
|
"objective": 441.1212463378906, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.4033554494380951, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 0.6839331984519958, |
|
"eval_logits": -1.4344693422317505, |
|
"eval_logps": -85.09794616699219, |
|
"eval_loss": 519.0431518554688, |
|
"eval_objective": 504.62091064453125, |
|
"eval_ranking_simple": 0.5383023023605347, |
|
"eval_regularize": 0.4692068099975586, |
|
"eval_runtime": 368.2123, |
|
"eval_samples_per_second": 15.725, |
|
"eval_steps_per_second": 1.312, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.6278888583183289, |
|
"epoch": 0.2975909305621162, |
|
"grad_norm": 16121.92097886621, |
|
"learning_rate": 4.952830188679246e-06, |
|
"logits": -1.4997217655181885, |
|
"logps": -80.34195709228516, |
|
"loss": 464.4937, |
|
"objective": 497.4685363769531, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.5123757123947144, |
|
"step": 105 |
|
}, |
|
{ |
|
"dpo_loss": 0.6059397459030151, |
|
"epoch": 0.3117619272555503, |
|
"grad_norm": 14461.937244168814, |
|
"learning_rate": 4.999781286194085e-06, |
|
"logits": -1.379300832748413, |
|
"logps": -78.7112045288086, |
|
"loss": 472.4527, |
|
"objective": 461.8623046875, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.47747528553009033, |
|
"step": 110 |
|
}, |
|
{ |
|
"dpo_loss": 0.6304748058319092, |
|
"epoch": 0.32593292394898443, |
|
"grad_norm": 15822.122754431652, |
|
"learning_rate": 4.998892826944418e-06, |
|
"logits": -1.303352952003479, |
|
"logps": -76.60855102539062, |
|
"loss": 466.8177, |
|
"objective": 472.0991516113281, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.48390674591064453, |
|
"step": 115 |
|
}, |
|
{ |
|
"dpo_loss": 0.5859030485153198, |
|
"epoch": 0.3401039206424185, |
|
"grad_norm": 14414.401646390268, |
|
"learning_rate": 4.997321195347154e-06, |
|
"logits": -1.2155264616012573, |
|
"logps": -78.24824523925781, |
|
"loss": 478.7331, |
|
"objective": 465.13909912109375, |
|
"ranking_simple": 0.574999988079071, |
|
"regularize": 0.44416898488998413, |
|
"step": 120 |
|
}, |
|
{ |
|
"dpo_loss": 0.5911449790000916, |
|
"epoch": 0.35427491733585265, |
|
"grad_norm": 12964.656141234129, |
|
"learning_rate": 4.9950668210706795e-06, |
|
"logits": -1.202903151512146, |
|
"logps": -79.2585220336914, |
|
"loss": 448.101, |
|
"objective": 439.24993896484375, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.4476715922355652, |
|
"step": 125 |
|
}, |
|
{ |
|
"dpo_loss": 0.6120374202728271, |
|
"epoch": 0.3684459140292867, |
|
"grad_norm": 15154.178723672056, |
|
"learning_rate": 4.992130320438411e-06, |
|
"logits": -1.1208935976028442, |
|
"logps": -78.75408172607422, |
|
"loss": 491.5896, |
|
"objective": 508.3560485839844, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.507615864276886, |
|
"step": 130 |
|
}, |
|
{ |
|
"dpo_loss": 0.5841532349586487, |
|
"epoch": 0.3826169107227208, |
|
"grad_norm": 13801.6700914244, |
|
"learning_rate": 4.988512496260302e-06, |
|
"logits": -1.0719252824783325, |
|
"logps": -79.74116516113281, |
|
"loss": 457.2002, |
|
"objective": 470.0576477050781, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.423565536737442, |
|
"step": 135 |
|
}, |
|
{ |
|
"dpo_loss": 0.6198513507843018, |
|
"epoch": 0.39678790741615494, |
|
"grad_norm": 15864.353210882817, |
|
"learning_rate": 4.984214337613357e-06, |
|
"logits": -1.0588831901550293, |
|
"logps": -78.39877319335938, |
|
"loss": 473.3348, |
|
"objective": 488.9702453613281, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.49831974506378174, |
|
"step": 140 |
|
}, |
|
{ |
|
"dpo_loss": 0.6141570806503296, |
|
"epoch": 0.410958904109589, |
|
"grad_norm": 13855.012204270928, |
|
"learning_rate": 4.979237019571235e-06, |
|
"logits": -1.0990999937057495, |
|
"logps": -81.36931610107422, |
|
"loss": 472.2201, |
|
"objective": 467.56060791015625, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.4914819896221161, |
|
"step": 145 |
|
}, |
|
{ |
|
"dpo_loss": 0.6057680249214172, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 12908.639969406, |
|
"learning_rate": 4.97358190288299e-06, |
|
"logits": -1.0687533617019653, |
|
"logps": -82.37923431396484, |
|
"loss": 462.7395, |
|
"objective": 476.6890869140625, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.5104095935821533, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42512990080302315, |
|
"eval_dpo_loss": 0.6978219151496887, |
|
"eval_logits": -1.114176630973816, |
|
"eval_logps": -85.36813354492188, |
|
"eval_loss": 552.14501953125, |
|
"eval_objective": 536.3590698242188, |
|
"eval_ranking_simple": 0.5367494821548462, |
|
"eval_regularize": 0.530274510383606, |
|
"eval_runtime": 367.6708, |
|
"eval_samples_per_second": 15.748, |
|
"eval_steps_per_second": 1.314, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.583582878112793, |
|
"epoch": 0.43930089749645723, |
|
"grad_norm": 13162.177891801954, |
|
"learning_rate": 4.967250533601059e-06, |
|
"logits": -1.109771966934204, |
|
"logps": -79.27082824707031, |
|
"loss": 471.5066, |
|
"objective": 491.6888122558594, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.5010552406311035, |
|
"step": 155 |
|
}, |
|
{ |
|
"dpo_loss": 0.5951789021492004, |
|
"epoch": 0.45347189418989137, |
|
"grad_norm": 13529.150362850014, |
|
"learning_rate": 4.9602446426585845e-06, |
|
"logits": -0.9521434307098389, |
|
"logps": -78.37739562988281, |
|
"loss": 462.6479, |
|
"objective": 443.32379150390625, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.43846720457077026, |
|
"step": 160 |
|
}, |
|
{ |
|
"dpo_loss": 0.5964730381965637, |
|
"epoch": 0.46764289088332545, |
|
"grad_norm": 12752.905142046688, |
|
"learning_rate": 4.952566145396197e-06, |
|
"logits": -0.9862248301506042, |
|
"logps": -77.13455963134766, |
|
"loss": 478.3803, |
|
"objective": 439.1322021484375, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.4469877779483795, |
|
"step": 165 |
|
}, |
|
{ |
|
"dpo_loss": 0.6038484573364258, |
|
"epoch": 0.4818138875767596, |
|
"grad_norm": 10136.08564624282, |
|
"learning_rate": 4.944217141038379e-06, |
|
"logits": -0.9612207412719727, |
|
"logps": -76.84504699707031, |
|
"loss": 464.1854, |
|
"objective": 468.0538635253906, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.48102495074272156, |
|
"step": 170 |
|
}, |
|
{ |
|
"dpo_loss": 0.6333703994750977, |
|
"epoch": 0.49598488427019366, |
|
"grad_norm": 12679.513797703847, |
|
"learning_rate": 4.935199912119558e-06, |
|
"logits": -0.8710211515426636, |
|
"logps": -78.75601959228516, |
|
"loss": 464.1875, |
|
"objective": 483.89111328125, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.514005720615387, |
|
"step": 175 |
|
}, |
|
{ |
|
"dpo_loss": 0.5934690833091736, |
|
"epoch": 0.5101558809636277, |
|
"grad_norm": 11848.627128456088, |
|
"learning_rate": 4.925516923860083e-06, |
|
"logits": -0.8811076879501343, |
|
"logps": -79.12806701660156, |
|
"loss": 454.5442, |
|
"objective": 488.2110900878906, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.4675270617008209, |
|
"step": 180 |
|
}, |
|
{ |
|
"dpo_loss": 0.5724626183509827, |
|
"epoch": 0.5243268776570619, |
|
"grad_norm": 11713.664064429433, |
|
"learning_rate": 4.9151708234922605e-06, |
|
"logits": -0.877926230430603, |
|
"logps": -78.4993667602539, |
|
"loss": 458.0594, |
|
"objective": 457.6112060546875, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.4596666693687439, |
|
"step": 185 |
|
}, |
|
{ |
|
"dpo_loss": 0.5861265659332275, |
|
"epoch": 0.538497874350496, |
|
"grad_norm": 11338.428858471121, |
|
"learning_rate": 4.904164439536626e-06, |
|
"logits": -0.9104651808738708, |
|
"logps": -78.65680694580078, |
|
"loss": 487.9273, |
|
"objective": 496.1139221191406, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.48734599351882935, |
|
"step": 190 |
|
}, |
|
{ |
|
"dpo_loss": 0.555807888507843, |
|
"epoch": 0.5526688710439301, |
|
"grad_norm": 12189.271370638136, |
|
"learning_rate": 4.8925007810286555e-06, |
|
"logits": -0.8752073645591736, |
|
"logps": -79.23562622070312, |
|
"loss": 470.8679, |
|
"objective": 470.63421630859375, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.4960786998271942, |
|
"step": 195 |
|
}, |
|
{ |
|
"dpo_loss": 0.5812997817993164, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 11198.141597437847, |
|
"learning_rate": 4.880183036696123e-06, |
|
"logits": -0.8242141008377075, |
|
"logps": -78.40170288085938, |
|
"loss": 445.5849, |
|
"objective": 460.94390869140625, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.44002261757850647, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 0.706484317779541, |
|
"eval_logits": -0.8469271063804626, |
|
"eval_logps": -81.4329833984375, |
|
"eval_loss": 561.5618896484375, |
|
"eval_objective": 550.3474731445312, |
|
"eval_ranking_simple": 0.533643901348114, |
|
"eval_regularize": 0.5524560809135437, |
|
"eval_runtime": 367.805, |
|
"eval_samples_per_second": 15.742, |
|
"eval_steps_per_second": 1.313, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.584477961063385, |
|
"epoch": 0.5810108644307983, |
|
"grad_norm": 11490.315926548616, |
|
"learning_rate": 4.867214574087338e-06, |
|
"logits": -0.6800127625465393, |
|
"logps": -77.10325622558594, |
|
"loss": 450.6568, |
|
"objective": 437.6501159667969, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.45991986989974976, |
|
"step": 205 |
|
}, |
|
{ |
|
"dpo_loss": 0.580778181552887, |
|
"epoch": 0.5951818611242324, |
|
"grad_norm": 12820.159548843532, |
|
"learning_rate": 4.853598938650487e-06, |
|
"logits": -0.7779163718223572, |
|
"logps": -75.716552734375, |
|
"loss": 486.1006, |
|
"objective": 472.6957702636719, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.47449955344200134, |
|
"step": 210 |
|
}, |
|
{ |
|
"dpo_loss": 0.5629280805587769, |
|
"epoch": 0.6093528578176665, |
|
"grad_norm": 12199.139102166855, |
|
"learning_rate": 4.8393398527643495e-06, |
|
"logits": -0.9319173097610474, |
|
"logps": -74.01848602294922, |
|
"loss": 450.7458, |
|
"objective": 442.0367736816406, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.4502807855606079, |
|
"step": 215 |
|
}, |
|
{ |
|
"dpo_loss": 0.551164984703064, |
|
"epoch": 0.6235238545111006, |
|
"grad_norm": 12032.737647695545, |
|
"learning_rate": 4.824441214720629e-06, |
|
"logits": -1.0898358821868896, |
|
"logps": -73.6335678100586, |
|
"loss": 444.6644, |
|
"objective": 445.2054748535156, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.4597654938697815, |
|
"step": 220 |
|
}, |
|
{ |
|
"dpo_loss": 0.583297848701477, |
|
"epoch": 0.6376948512045347, |
|
"grad_norm": 9292.574396967713, |
|
"learning_rate": 4.808907097658205e-06, |
|
"logits": -1.052019476890564, |
|
"logps": -75.00895690917969, |
|
"loss": 448.7449, |
|
"objective": 459.1669921875, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.4535444974899292, |
|
"step": 225 |
|
}, |
|
{ |
|
"dpo_loss": 0.5654311776161194, |
|
"epoch": 0.6518658478979689, |
|
"grad_norm": 10190.172018933226, |
|
"learning_rate": 4.7927417484495756e-06, |
|
"logits": -0.8342668414115906, |
|
"logps": -75.51798248291016, |
|
"loss": 436.2228, |
|
"objective": 409.9478454589844, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.4194689691066742, |
|
"step": 230 |
|
}, |
|
{ |
|
"dpo_loss": 0.6019126772880554, |
|
"epoch": 0.6660368445914029, |
|
"grad_norm": 12000.878566078438, |
|
"learning_rate": 4.7759495865398035e-06, |
|
"logits": -0.64374178647995, |
|
"logps": -77.013916015625, |
|
"loss": 457.7021, |
|
"objective": 454.7564697265625, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.4394772946834564, |
|
"step": 235 |
|
}, |
|
{ |
|
"dpo_loss": 0.5635860562324524, |
|
"epoch": 0.680207841284837, |
|
"grad_norm": 11997.018653451396, |
|
"learning_rate": 4.758535202738287e-06, |
|
"logits": -0.8103247284889221, |
|
"logps": -77.16266632080078, |
|
"loss": 448.4921, |
|
"objective": 470.2117004394531, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.4310372471809387, |
|
"step": 240 |
|
}, |
|
{ |
|
"dpo_loss": 0.612980306148529, |
|
"epoch": 0.6943788379782712, |
|
"grad_norm": 12037.283885838251, |
|
"learning_rate": 4.740503357963676e-06, |
|
"logits": -0.8841701745986938, |
|
"logps": -77.16488647460938, |
|
"loss": 431.5274, |
|
"objective": 442.6828308105469, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.4170995056629181, |
|
"step": 245 |
|
}, |
|
{ |
|
"dpo_loss": 0.5597648024559021, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 10567.387466711676, |
|
"learning_rate": 4.721858981942284e-06, |
|
"logits": -0.8271477818489075, |
|
"logps": -75.92268371582031, |
|
"loss": 445.1676, |
|
"objective": 432.1976013183594, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.39139440655708313, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7085498346717053, |
|
"eval_dpo_loss": 0.7070175409317017, |
|
"eval_logits": -1.0391225814819336, |
|
"eval_logps": -80.71741485595703, |
|
"eval_loss": 572.16943359375, |
|
"eval_objective": 563.6924438476562, |
|
"eval_ranking_simple": 0.5408902764320374, |
|
"eval_regularize": 0.5829753279685974, |
|
"eval_runtime": 368.3728, |
|
"eval_samples_per_second": 15.718, |
|
"eval_steps_per_second": 1.311, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.5912286639213562, |
|
"epoch": 0.7227208313651393, |
|
"grad_norm": 10004.706428299638, |
|
"learning_rate": 4.702607171860354e-06, |
|
"logits": -1.0291627645492554, |
|
"logps": -75.54689025878906, |
|
"loss": 430.9432, |
|
"objective": 432.97802734375, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.4280008375644684, |
|
"step": 255 |
|
}, |
|
{ |
|
"dpo_loss": 0.5518169403076172, |
|
"epoch": 0.7368918280585735, |
|
"grad_norm": 10338.762017019879, |
|
"learning_rate": 4.682753190970533e-06, |
|
"logits": -0.9816469550132751, |
|
"logps": -78.38555145263672, |
|
"loss": 441.5456, |
|
"objective": 472.0696716308594, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.4596433639526367, |
|
"step": 260 |
|
}, |
|
{ |
|
"dpo_loss": 0.5279027819633484, |
|
"epoch": 0.7510628247520076, |
|
"grad_norm": 9975.569885392504, |
|
"learning_rate": 4.6623024671529555e-06, |
|
"logits": -0.8709200024604797, |
|
"logps": -78.86503601074219, |
|
"loss": 415.3095, |
|
"objective": 424.6243896484375, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.4246416687965393, |
|
"step": 265 |
|
}, |
|
{ |
|
"dpo_loss": 0.5645706057548523, |
|
"epoch": 0.7652338214454416, |
|
"grad_norm": 10607.895202202406, |
|
"learning_rate": 4.641260591431315e-06, |
|
"logits": -0.8247819542884827, |
|
"logps": -78.76771545410156, |
|
"loss": 434.6699, |
|
"objective": 440.14715576171875, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.42400452494621277, |
|
"step": 270 |
|
}, |
|
{ |
|
"dpo_loss": 0.587860107421875, |
|
"epoch": 0.7794048181388757, |
|
"grad_norm": 9977.49094753435, |
|
"learning_rate": 4.61963331644433e-06, |
|
"logits": -0.6680871844291687, |
|
"logps": -77.70354461669922, |
|
"loss": 438.6425, |
|
"objective": 448.8917236328125, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.46749448776245117, |
|
"step": 275 |
|
}, |
|
{ |
|
"dpo_loss": 0.5520439147949219, |
|
"epoch": 0.7935758148323099, |
|
"grad_norm": 11654.173000851612, |
|
"learning_rate": 4.597426554873037e-06, |
|
"logits": -0.7018941640853882, |
|
"logps": -78.6235122680664, |
|
"loss": 443.7228, |
|
"objective": 438.81072998046875, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.4298190176486969, |
|
"step": 280 |
|
}, |
|
{ |
|
"dpo_loss": 0.5620520710945129, |
|
"epoch": 0.807746811525744, |
|
"grad_norm": 11208.671816444528, |
|
"learning_rate": 4.574646377824316e-06, |
|
"logits": -0.7278221845626831, |
|
"logps": -79.00438690185547, |
|
"loss": 428.4479, |
|
"objective": 430.4336242675781, |
|
"ranking_simple": 0.5375000238418579, |
|
"regularize": 0.413059800863266, |
|
"step": 285 |
|
}, |
|
{ |
|
"dpo_loss": 0.5554956793785095, |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 10809.870000518249, |
|
"learning_rate": 4.551299013171111e-06, |
|
"logits": -0.7191876769065857, |
|
"logps": -77.41136169433594, |
|
"loss": 399.2666, |
|
"objective": 398.6150817871094, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.3730964958667755, |
|
"step": 290 |
|
}, |
|
{ |
|
"dpo_loss": 0.5479804873466492, |
|
"epoch": 0.8360888049126122, |
|
"grad_norm": 10163.479021180265, |
|
"learning_rate": 4.5273908438498e-06, |
|
"logits": -0.7566318511962891, |
|
"logps": -78.29537963867188, |
|
"loss": 420.4245, |
|
"objective": 438.2878112792969, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.43994590640068054, |
|
"step": 295 |
|
}, |
|
{ |
|
"dpo_loss": 0.564975917339325, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 10246.9159310537, |
|
"learning_rate": 4.502928406115152e-06, |
|
"logits": -0.7081549167633057, |
|
"logps": -79.06087493896484, |
|
"loss": 413.9375, |
|
"objective": 419.59674072265625, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.3981608748435974, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 0.7031394839286804, |
|
"eval_logits": -0.7451677918434143, |
|
"eval_logps": -84.88603210449219, |
|
"eval_loss": 567.0264282226562, |
|
"eval_objective": 558.1202392578125, |
|
"eval_ranking_simple": 0.5398550629615784, |
|
"eval_regularize": 0.573249101638794, |
|
"eval_runtime": 374.6781, |
|
"eval_samples_per_second": 15.453, |
|
"eval_steps_per_second": 1.289, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.557949423789978, |
|
"epoch": 0.8644307982994804, |
|
"grad_norm": 10481.083205902773, |
|
"learning_rate": 4.477918387753388e-06, |
|
"logits": -0.6799180507659912, |
|
"logps": -79.46160125732422, |
|
"loss": 416.0437, |
|
"objective": 436.0469970703125, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.3994833528995514, |
|
"step": 305 |
|
}, |
|
{ |
|
"dpo_loss": 0.537316083908081, |
|
"epoch": 0.8786017949929145, |
|
"grad_norm": 10330.127581982748, |
|
"learning_rate": 4.452367626253805e-06, |
|
"logits": -0.6589821577072144, |
|
"logps": -77.94523620605469, |
|
"loss": 412.2728, |
|
"objective": 435.38397216796875, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.42988669872283936, |
|
"step": 310 |
|
}, |
|
{ |
|
"dpo_loss": 0.5410233736038208, |
|
"epoch": 0.8927727916863486, |
|
"grad_norm": 9680.592388360066, |
|
"learning_rate": 4.426283106939474e-06, |
|
"logits": -0.5960977077484131, |
|
"logps": -77.44992065429688, |
|
"loss": 407.9562, |
|
"objective": 391.1397705078125, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.3800966143608093, |
|
"step": 315 |
|
}, |
|
{ |
|
"dpo_loss": 0.5544202923774719, |
|
"epoch": 0.9069437883797827, |
|
"grad_norm": 10390.175171602317, |
|
"learning_rate": 4.399671961057523e-06, |
|
"logits": -0.4942823052406311, |
|
"logps": -79.33159637451172, |
|
"loss": 418.3425, |
|
"objective": 413.7690124511719, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.3666258752346039, |
|
"step": 320 |
|
}, |
|
{ |
|
"dpo_loss": 0.5637380480766296, |
|
"epoch": 0.9211147850732169, |
|
"grad_norm": 9093.12442680975, |
|
"learning_rate": 4.372541463829524e-06, |
|
"logits": -0.5750992298126221, |
|
"logps": -79.70758819580078, |
|
"loss": 400.4521, |
|
"objective": 385.9770202636719, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.3547678589820862, |
|
"step": 325 |
|
}, |
|
{ |
|
"dpo_loss": 0.527768075466156, |
|
"epoch": 0.9352857817666509, |
|
"grad_norm": 9514.787690422123, |
|
"learning_rate": 4.3448990324625244e-06, |
|
"logits": -0.5246156454086304, |
|
"logps": -78.73555755615234, |
|
"loss": 404.0078, |
|
"objective": 408.9071960449219, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.38706058263778687, |
|
"step": 330 |
|
}, |
|
{ |
|
"dpo_loss": 0.5262110829353333, |
|
"epoch": 0.949456778460085, |
|
"grad_norm": 8108.6927584156865, |
|
"learning_rate": 4.316752224121252e-06, |
|
"logits": -0.5318282246589661, |
|
"logps": -78.84705352783203, |
|
"loss": 395.0854, |
|
"objective": 391.35888671875, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.3775762617588043, |
|
"step": 335 |
|
}, |
|
{ |
|
"dpo_loss": 0.5850500464439392, |
|
"epoch": 0.9636277751535192, |
|
"grad_norm": 9909.017294058862, |
|
"learning_rate": 4.288108733862064e-06, |
|
"logits": -0.5778205394744873, |
|
"logps": -78.09910583496094, |
|
"loss": 425.4681, |
|
"objective": 410.2987365722656, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.3491324782371521, |
|
"step": 340 |
|
}, |
|
{ |
|
"dpo_loss": 0.5264328718185425, |
|
"epoch": 0.9777987718469532, |
|
"grad_norm": 10518.367912162912, |
|
"learning_rate": 4.2589763925291924e-06, |
|
"logits": -0.6430075168609619, |
|
"logps": -76.8382797241211, |
|
"loss": 386.2568, |
|
"objective": 371.39697265625, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.3261619508266449, |
|
"step": 345 |
|
}, |
|
{ |
|
"dpo_loss": 0.5298411846160889, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 8822.17526005844, |
|
"learning_rate": 4.229363164613874e-06, |
|
"logits": -0.5401391983032227, |
|
"logps": -78.81169128417969, |
|
"loss": 385.7652, |
|
"objective": 396.1875305175781, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.4019112288951874, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9919697685403873, |
|
"eval_dpo_loss": 0.7081769108772278, |
|
"eval_logits": -0.6075623035430908, |
|
"eval_logps": -82.63887786865234, |
|
"eval_loss": 581.0134887695312, |
|
"eval_objective": 565.1652221679688, |
|
"eval_ranking_simple": 0.5383023023605347, |
|
"eval_regularize": 0.5905848741531372, |
|
"eval_runtime": 368.0626, |
|
"eval_samples_per_second": 15.731, |
|
"eval_steps_per_second": 1.312, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.5235918760299683, |
|
"epoch": 1.0061407652338215, |
|
"grad_norm": 7985.929619871145, |
|
"learning_rate": 4.199277146076933e-06, |
|
"logits": -0.5844969153404236, |
|
"logps": -77.1658935546875, |
|
"loss": 395.3726, |
|
"objective": 383.5928955078125, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.3756142854690552, |
|
"step": 355 |
|
}, |
|
{ |
|
"dpo_loss": 0.5242210030555725, |
|
"epoch": 1.0203117619272555, |
|
"grad_norm": 9633.18732259904, |
|
"learning_rate": 4.168726562135432e-06, |
|
"logits": -0.5656494498252869, |
|
"logps": -78.61763000488281, |
|
"loss": 382.5271, |
|
"objective": 390.3468322753906, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.3490845561027527, |
|
"step": 360 |
|
}, |
|
{ |
|
"dpo_loss": 0.5211578011512756, |
|
"epoch": 1.0344827586206897, |
|
"grad_norm": 9644.11378605678, |
|
"learning_rate": 4.137719765013974e-06, |
|
"logits": -0.5837284922599792, |
|
"logps": -78.80403900146484, |
|
"loss": 393.9499, |
|
"objective": 399.89215087890625, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.3770846724510193, |
|
"step": 365 |
|
}, |
|
{ |
|
"dpo_loss": 0.5336027145385742, |
|
"epoch": 1.0486537553141237, |
|
"grad_norm": 8251.35503586343, |
|
"learning_rate": 4.106265231661292e-06, |
|
"logits": -0.5281592607498169, |
|
"logps": -76.34611511230469, |
|
"loss": 364.8078, |
|
"objective": 364.66595458984375, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.337600976228714, |
|
"step": 370 |
|
}, |
|
{ |
|
"dpo_loss": 0.5174949765205383, |
|
"epoch": 1.0628247520075578, |
|
"grad_norm": 9028.93002381266, |
|
"learning_rate": 4.074371561432731e-06, |
|
"logits": -0.5678179860115051, |
|
"logps": -76.8271255493164, |
|
"loss": 372.633, |
|
"objective": 343.89019775390625, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.30460023880004883, |
|
"step": 375 |
|
}, |
|
{ |
|
"dpo_loss": 0.5492153167724609, |
|
"epoch": 1.076995748700992, |
|
"grad_norm": 8789.25650294624, |
|
"learning_rate": 4.042047473739278e-06, |
|
"logits": -0.5127583146095276, |
|
"logps": -78.71736907958984, |
|
"loss": 373.876, |
|
"objective": 406.1351013183594, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.36652448773384094, |
|
"step": 380 |
|
}, |
|
{ |
|
"dpo_loss": 0.5343239307403564, |
|
"epoch": 1.091166745394426, |
|
"grad_norm": 8695.398130642287, |
|
"learning_rate": 4.009301805663752e-06, |
|
"logits": -0.439236581325531, |
|
"logps": -77.64140319824219, |
|
"loss": 381.8216, |
|
"objective": 398.8446960449219, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.3279392123222351, |
|
"step": 385 |
|
}, |
|
{ |
|
"dpo_loss": 0.5177706480026245, |
|
"epoch": 1.10533774208786, |
|
"grad_norm": 9041.130741001136, |
|
"learning_rate": 3.976143509544843e-06, |
|
"logits": -0.4288846254348755, |
|
"logps": -79.35343933105469, |
|
"loss": 357.1327, |
|
"objective": 361.1878967285156, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.3373713791370392, |
|
"step": 390 |
|
}, |
|
{ |
|
"dpo_loss": 0.5048284530639648, |
|
"epoch": 1.1195087387812943, |
|
"grad_norm": 8902.672627868116, |
|
"learning_rate": 3.9425816505296254e-06, |
|
"logits": -0.48021775484085083, |
|
"logps": -78.194091796875, |
|
"loss": 394.8973, |
|
"objective": 412.3692932128906, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.3603326082229614, |
|
"step": 395 |
|
}, |
|
{ |
|
"dpo_loss": 0.5264464020729065, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 8995.938641641998, |
|
"learning_rate": 3.908625404095242e-06, |
|
"logits": -0.4987303912639618, |
|
"logps": -77.11076354980469, |
|
"loss": 376.3251, |
|
"objective": 368.0682373046875, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.31286415457725525, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 0.7118101716041565, |
|
"eval_logits": -0.5273135900497437, |
|
"eval_logps": -81.622314453125, |
|
"eval_loss": 586.0215454101562, |
|
"eval_objective": 571.4174194335938, |
|
"eval_ranking_simple": 0.5367494821548462, |
|
"eval_regularize": 0.5996229648590088, |
|
"eval_runtime": 367.4788, |
|
"eval_samples_per_second": 15.756, |
|
"eval_steps_per_second": 1.314, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.5221129059791565, |
|
"epoch": 1.1478507321681626, |
|
"grad_norm": 8887.877038015888, |
|
"learning_rate": 3.8742840535404155e-06, |
|
"logits": -0.4772927165031433, |
|
"logps": -78.21456909179688, |
|
"loss": 374.1795, |
|
"objective": 403.9602355957031, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.35953524708747864, |
|
"step": 405 |
|
}, |
|
{ |
|
"dpo_loss": 0.5320737361907959, |
|
"epoch": 1.1620217288615966, |
|
"grad_norm": 8653.742076980236, |
|
"learning_rate": 3.839566987447492e-06, |
|
"logits": -0.5261733531951904, |
|
"logps": -77.4281005859375, |
|
"loss": 357.8752, |
|
"objective": 369.8106689453125, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.31248220801353455, |
|
"step": 410 |
|
}, |
|
{ |
|
"dpo_loss": 0.5130491852760315, |
|
"epoch": 1.1761927255550306, |
|
"grad_norm": 8220.944462330583, |
|
"learning_rate": 3.8044836971156935e-06, |
|
"logits": -0.4498496651649475, |
|
"logps": -76.78229522705078, |
|
"loss": 367.3453, |
|
"objective": 342.2223815917969, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.32919442653656006, |
|
"step": 415 |
|
}, |
|
{ |
|
"dpo_loss": 0.5148084163665771, |
|
"epoch": 1.1903637222484649, |
|
"grad_norm": 8364.696648518799, |
|
"learning_rate": 3.7690437739662928e-06, |
|
"logits": -0.4175103008747101, |
|
"logps": -78.17424011230469, |
|
"loss": 381.6126, |
|
"objective": 368.6730041503906, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.34451645612716675, |
|
"step": 420 |
|
}, |
|
{ |
|
"dpo_loss": 0.5163763761520386, |
|
"epoch": 1.204534718941899, |
|
"grad_norm": 8541.974606101103, |
|
"learning_rate": 3.7332569069204127e-06, |
|
"logits": -0.45798221230506897, |
|
"logps": -76.5196304321289, |
|
"loss": 370.1389, |
|
"objective": 370.2024230957031, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.3256681561470032, |
|
"step": 425 |
|
}, |
|
{ |
|
"dpo_loss": 0.5450712442398071, |
|
"epoch": 1.2187057156353331, |
|
"grad_norm": 9487.349462942979, |
|
"learning_rate": 3.697132879750174e-06, |
|
"logits": -0.48889076709747314, |
|
"logps": -78.89514923095703, |
|
"loss": 384.2139, |
|
"objective": 399.7441101074219, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.3587479293346405, |
|
"step": 430 |
|
}, |
|
{ |
|
"dpo_loss": 0.4979787766933441, |
|
"epoch": 1.2328767123287672, |
|
"grad_norm": 9248.918944555557, |
|
"learning_rate": 3.6606815684039098e-06, |
|
"logits": -0.45076984167099, |
|
"logps": -78.12223815917969, |
|
"loss": 376.6027, |
|
"objective": 365.3465881347656, |
|
"ranking_simple": 0.6708333492279053, |
|
"regularize": 0.33380749821662903, |
|
"step": 435 |
|
}, |
|
{ |
|
"dpo_loss": 0.5080611109733582, |
|
"epoch": 1.2470477090222012, |
|
"grad_norm": 8952.956886414895, |
|
"learning_rate": 3.6239129383061764e-06, |
|
"logits": -0.5257605314254761, |
|
"logps": -77.80259704589844, |
|
"loss": 367.3115, |
|
"objective": 355.6669921875, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.3250352144241333, |
|
"step": 440 |
|
}, |
|
{ |
|
"dpo_loss": 0.5034739375114441, |
|
"epoch": 1.2612187057156352, |
|
"grad_norm": 8595.813908218257, |
|
"learning_rate": 3.586837041633312e-06, |
|
"logits": -0.5648617148399353, |
|
"logps": -78.69916534423828, |
|
"loss": 353.7886, |
|
"objective": 345.4654235839844, |
|
"ranking_simple": 0.6625000238418579, |
|
"regularize": 0.3045599162578583, |
|
"step": 445 |
|
}, |
|
{ |
|
"dpo_loss": 0.5136024951934814, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 8799.318521509487, |
|
"learning_rate": 3.5494640145652647e-06, |
|
"logits": -0.6497453451156616, |
|
"logps": -77.91305541992188, |
|
"loss": 348.4717, |
|
"objective": 341.05877685546875, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.2908380925655365, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.2753897024090695, |
|
"eval_dpo_loss": 0.7055455446243286, |
|
"eval_logits": -0.6517468094825745, |
|
"eval_logps": -81.88983154296875, |
|
"eval_loss": 576.5939331054688, |
|
"eval_objective": 563.9976806640625, |
|
"eval_ranking_simple": 0.5372670888900757, |
|
"eval_regularize": 0.586566150188446, |
|
"eval_runtime": 372.0863, |
|
"eval_samples_per_second": 15.561, |
|
"eval_steps_per_second": 1.298, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.5133717656135559, |
|
"epoch": 1.2895606991025035, |
|
"grad_norm": 8447.712740682113, |
|
"learning_rate": 3.511804074514468e-06, |
|
"logits": -0.50225830078125, |
|
"logps": -78.78018188476562, |
|
"loss": 350.7452, |
|
"objective": 353.5893249511719, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.30045947432518005, |
|
"step": 455 |
|
}, |
|
{ |
|
"dpo_loss": 0.5129916071891785, |
|
"epoch": 1.3037316957959377, |
|
"grad_norm": 8683.9771262322, |
|
"learning_rate": 3.4738675173325008e-06, |
|
"logits": -0.5019214153289795, |
|
"logps": -78.0320816040039, |
|
"loss": 358.7841, |
|
"objective": 359.1448669433594, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.28918400406837463, |
|
"step": 460 |
|
}, |
|
{ |
|
"dpo_loss": 0.5200445055961609, |
|
"epoch": 1.3179026924893718, |
|
"grad_norm": 8708.589588634271, |
|
"learning_rate": 3.435664714495301e-06, |
|
"logits": -0.4990668296813965, |
|
"logps": -76.45315551757812, |
|
"loss": 363.9146, |
|
"objective": 359.3269348144531, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.317513108253479, |
|
"step": 465 |
|
}, |
|
{ |
|
"dpo_loss": 0.5191565752029419, |
|
"epoch": 1.3320736891828058, |
|
"grad_norm": 7892.018349995868, |
|
"learning_rate": 3.397206110267713e-06, |
|
"logits": -0.5707500576972961, |
|
"logps": -75.51515197753906, |
|
"loss": 369.7767, |
|
"objective": 370.79046630859375, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.30417945981025696, |
|
"step": 470 |
|
}, |
|
{ |
|
"dpo_loss": 0.5160278081893921, |
|
"epoch": 1.34624468587624, |
|
"grad_norm": 8622.437524812693, |
|
"learning_rate": 3.3585022188481247e-06, |
|
"logits": -0.5167524814605713, |
|
"logps": -77.0745849609375, |
|
"loss": 347.9223, |
|
"objective": 363.10107421875, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.3366948366165161, |
|
"step": 475 |
|
}, |
|
{ |
|
"dpo_loss": 0.5038079023361206, |
|
"epoch": 1.360415682569674, |
|
"grad_norm": 8520.652876751588, |
|
"learning_rate": 3.3195636214939943e-06, |
|
"logits": -0.5720607042312622, |
|
"logps": -76.88468170166016, |
|
"loss": 352.4882, |
|
"objective": 360.6014709472656, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.31744828820228577, |
|
"step": 480 |
|
}, |
|
{ |
|
"dpo_loss": 0.5260137915611267, |
|
"epoch": 1.3745866792631083, |
|
"grad_norm": 8357.649136802573, |
|
"learning_rate": 3.2804009636290403e-06, |
|
"logits": -0.5760036706924438, |
|
"logps": -75.65044403076172, |
|
"loss": 352.4908, |
|
"objective": 335.92327880859375, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.27229636907577515, |
|
"step": 485 |
|
}, |
|
{ |
|
"dpo_loss": 0.5316032767295837, |
|
"epoch": 1.3887576759565423, |
|
"grad_norm": 8377.120289104301, |
|
"learning_rate": 3.2410249519328848e-06, |
|
"logits": -0.5220092535018921, |
|
"logps": -78.16001892089844, |
|
"loss": 350.6266, |
|
"objective": 356.1325988769531, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.2998295724391937, |
|
"step": 490 |
|
}, |
|
{ |
|
"dpo_loss": 0.5069059729576111, |
|
"epoch": 1.4029286726499763, |
|
"grad_norm": 9790.99603957699, |
|
"learning_rate": 3.201446351413958e-06, |
|
"logits": -0.5315040349960327, |
|
"logps": -78.9278793334961, |
|
"loss": 355.1208, |
|
"objective": 340.3614196777344, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.2911123037338257, |
|
"step": 495 |
|
}, |
|
{ |
|
"dpo_loss": 0.5013086199760437, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 9096.333510047558, |
|
"learning_rate": 3.1616759824664543e-06, |
|
"logits": -0.47489532828330994, |
|
"logps": -78.39351654052734, |
|
"loss": 351.4185, |
|
"objective": 349.8325500488281, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.2930639684200287, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 0.7128105759620667, |
|
"eval_logits": -0.5594385862350464, |
|
"eval_logps": -82.85633087158203, |
|
"eval_loss": 584.3820190429688, |
|
"eval_objective": 570.8920288085938, |
|
"eval_ranking_simple": 0.5393374562263489, |
|
"eval_regularize": 0.597196638584137, |
|
"eval_runtime": 367.2146, |
|
"eval_samples_per_second": 15.767, |
|
"eval_steps_per_second": 1.315, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.502229630947113, |
|
"epoch": 1.4312706660368446, |
|
"grad_norm": 7569.16735798095, |
|
"learning_rate": 3.121724717912138e-06, |
|
"logits": -0.4940933585166931, |
|
"logps": -79.02289581298828, |
|
"loss": 350.6974, |
|
"objective": 343.7030944824219, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.2822100818157196, |
|
"step": 505 |
|
}, |
|
{ |
|
"dpo_loss": 0.5396325588226318, |
|
"epoch": 1.4454416627302786, |
|
"grad_norm": 8970.009407427333, |
|
"learning_rate": 3.081603480027826e-06, |
|
"logits": -0.5547798275947571, |
|
"logps": -79.63969421386719, |
|
"loss": 354.8519, |
|
"objective": 349.9094543457031, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.27683576941490173, |
|
"step": 510 |
|
}, |
|
{ |
|
"dpo_loss": 0.5251208543777466, |
|
"epoch": 1.4596126594237129, |
|
"grad_norm": 8332.379052556926, |
|
"learning_rate": 3.04132323755935e-06, |
|
"logits": -0.627532422542572, |
|
"logps": -79.98551940917969, |
|
"loss": 345.0198, |
|
"objective": 359.2087707519531, |
|
"ranking_simple": 0.5416666865348816, |
|
"regularize": 0.2865109145641327, |
|
"step": 515 |
|
}, |
|
{ |
|
"dpo_loss": 0.5008211135864258, |
|
"epoch": 1.473783656117147, |
|
"grad_norm": 8229.897145458208, |
|
"learning_rate": 3.0008950027228035e-06, |
|
"logits": -0.5891799330711365, |
|
"logps": -76.67542266845703, |
|
"loss": 342.313, |
|
"objective": 343.9622802734375, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.27687618136405945, |
|
"step": 520 |
|
}, |
|
{ |
|
"dpo_loss": 0.4947444498538971, |
|
"epoch": 1.487954652810581, |
|
"grad_norm": 7702.6950001891755, |
|
"learning_rate": 2.960329828193918e-06, |
|
"logits": -0.5145970582962036, |
|
"logps": -75.67664337158203, |
|
"loss": 332.768, |
|
"objective": 329.7011413574219, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.2696382403373718, |
|
"step": 525 |
|
}, |
|
{ |
|
"dpo_loss": 0.5160828828811646, |
|
"epoch": 1.5021256495040152, |
|
"grad_norm": 8386.762596966919, |
|
"learning_rate": 2.9196388040863695e-06, |
|
"logits": -0.616746187210083, |
|
"logps": -78.44214630126953, |
|
"loss": 362.1123, |
|
"objective": 358.41510009765625, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.29886895418167114, |
|
"step": 530 |
|
}, |
|
{ |
|
"dpo_loss": 0.495453417301178, |
|
"epoch": 1.5162966461974492, |
|
"grad_norm": 7823.009103949036, |
|
"learning_rate": 2.8788330549198512e-06, |
|
"logits": -0.6062889099121094, |
|
"logps": -77.4250717163086, |
|
"loss": 331.5395, |
|
"objective": 311.8442687988281, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.24911071360111237, |
|
"step": 535 |
|
}, |
|
{ |
|
"dpo_loss": 0.5264947414398193, |
|
"epoch": 1.5304676428908834, |
|
"grad_norm": 8960.269885367992, |
|
"learning_rate": 2.8379237365787426e-06, |
|
"logits": -0.5448920130729675, |
|
"logps": -77.27252960205078, |
|
"loss": 334.277, |
|
"objective": 340.4452209472656, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.2655259668827057, |
|
"step": 540 |
|
}, |
|
{ |
|
"dpo_loss": 0.5327333211898804, |
|
"epoch": 1.5446386395843175, |
|
"grad_norm": 8003.393273489204, |
|
"learning_rate": 2.7969220332622004e-06, |
|
"logits": -0.6398530602455139, |
|
"logps": -76.85224151611328, |
|
"loss": 331.0988, |
|
"objective": 332.23486328125, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.2610936164855957, |
|
"step": 545 |
|
}, |
|
{ |
|
"dpo_loss": 0.5121258497238159, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 7510.163531162746, |
|
"learning_rate": 2.7558391544265127e-06, |
|
"logits": -0.6813774704933167, |
|
"logps": -76.5182876586914, |
|
"loss": 326.458, |
|
"objective": 333.58294677734375, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.27006784081459045, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.5588096362777515, |
|
"eval_dpo_loss": 0.7086328268051147, |
|
"eval_logits": -0.6993709802627563, |
|
"eval_logps": -80.56141662597656, |
|
"eval_loss": 578.3502807617188, |
|
"eval_objective": 565.96826171875, |
|
"eval_ranking_simple": 0.5367494821548462, |
|
"eval_regularize": 0.5877013206481934, |
|
"eval_runtime": 367.856, |
|
"eval_samples_per_second": 15.74, |
|
"eval_steps_per_second": 1.313, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 0.5160035490989685, |
|
"epoch": 1.5729806329711855, |
|
"grad_norm": 8802.921874841602, |
|
"learning_rate": 2.714686331720543e-06, |
|
"logits": -0.5305084586143494, |
|
"logps": -76.84326934814453, |
|
"loss": 342.895, |
|
"objective": 352.5166320800781, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.26788362860679626, |
|
"step": 555 |
|
}, |
|
{ |
|
"dpo_loss": 0.5147711634635925, |
|
"epoch": 1.5871516296646198, |
|
"grad_norm": 7348.156155946241, |
|
"learning_rate": 2.6734748159151104e-06, |
|
"logits": -0.5309932231903076, |
|
"logps": -76.01145935058594, |
|
"loss": 327.5288, |
|
"objective": 325.1703796386719, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.25214916467666626, |
|
"step": 560 |
|
}, |
|
{ |
|
"dpo_loss": 0.5153753161430359, |
|
"epoch": 1.601322626358054, |
|
"grad_norm": 8906.64379732216, |
|
"learning_rate": 2.632215873827142e-06, |
|
"logits": -0.4652445912361145, |
|
"logps": -76.68761444091797, |
|
"loss": 354.5548, |
|
"objective": 364.25689697265625, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.28750428557395935, |
|
"step": 565 |
|
}, |
|
{ |
|
"dpo_loss": 0.519721508026123, |
|
"epoch": 1.615493623051488, |
|
"grad_norm": 8653.666075373956, |
|
"learning_rate": 2.5909207852394363e-06, |
|
"logits": -0.5437235236167908, |
|
"logps": -77.414794921875, |
|
"loss": 333.0782, |
|
"objective": 335.1112365722656, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.25310030579566956, |
|
"step": 570 |
|
}, |
|
{ |
|
"dpo_loss": 0.5159035325050354, |
|
"epoch": 1.629664619744922, |
|
"grad_norm": 9098.50496014823, |
|
"learning_rate": 2.5496008398168844e-06, |
|
"logits": -0.5154822468757629, |
|
"logps": -74.86051177978516, |
|
"loss": 338.3886, |
|
"objective": 360.5445861816406, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.293546199798584, |
|
"step": 575 |
|
}, |
|
{ |
|
"dpo_loss": 0.5085076093673706, |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 7622.784591036375, |
|
"learning_rate": 2.508267334019988e-06, |
|
"logits": -0.5285104513168335, |
|
"logps": -75.09459686279297, |
|
"loss": 326.7957, |
|
"objective": 322.0632019042969, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.23723416030406952, |
|
"step": 580 |
|
}, |
|
{ |
|
"dpo_loss": 0.5216612219810486, |
|
"epoch": 1.6580066131317903, |
|
"grad_norm": 8435.901879905807, |
|
"learning_rate": 2.46693156801652e-06, |
|
"logits": -0.43548285961151123, |
|
"logps": -74.49349212646484, |
|
"loss": 324.9342, |
|
"objective": 314.76617431640625, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.23195335268974304, |
|
"step": 585 |
|
}, |
|
{ |
|
"dpo_loss": 0.5121405720710754, |
|
"epoch": 1.6721776098252243, |
|
"grad_norm": 8829.097116710429, |
|
"learning_rate": 2.4256048425921693e-06, |
|
"logits": -0.4449107050895691, |
|
"logps": -75.21448516845703, |
|
"loss": 332.8984, |
|
"objective": 336.7486572265625, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.25163254141807556, |
|
"step": 590 |
|
}, |
|
{ |
|
"dpo_loss": 0.5095264911651611, |
|
"epoch": 1.6863486065186586, |
|
"grad_norm": 8981.762264761135, |
|
"learning_rate": 2.384298456061023e-06, |
|
"logits": -0.4447081387042999, |
|
"logps": -75.95592498779297, |
|
"loss": 329.941, |
|
"objective": 336.85479736328125, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.263884961605072, |
|
"step": 595 |
|
}, |
|
{ |
|
"dpo_loss": 0.5105345845222473, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 8799.189125731713, |
|
"learning_rate": 2.3430237011767166e-06, |
|
"logits": -0.5217716693878174, |
|
"logps": -76.75984191894531, |
|
"loss": 329.0151, |
|
"objective": 342.8132019042969, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.2702232301235199, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 0.7084978222846985, |
|
"eval_logits": -0.593561053276062, |
|
"eval_logps": -80.32785034179688, |
|
"eval_loss": 578.38671875, |
|
"eval_objective": 566.0594482421875, |
|
"eval_ranking_simple": 0.5388198494911194, |
|
"eval_regularize": 0.5912774801254272, |
|
"eval_runtime": 369.5452, |
|
"eval_samples_per_second": 15.668, |
|
"eval_steps_per_second": 1.307, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 0.5166282653808594, |
|
"epoch": 1.7146905999055266, |
|
"grad_norm": 8773.738115986773, |
|
"learning_rate": 2.30179186204511e-06, |
|
"logits": -0.4922519624233246, |
|
"logps": -77.39384460449219, |
|
"loss": 324.8781, |
|
"objective": 324.775390625, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.23106712102890015, |
|
"step": 605 |
|
}, |
|
{ |
|
"dpo_loss": 0.5313800573348999, |
|
"epoch": 1.7288615965989607, |
|
"grad_norm": 9468.956357202407, |
|
"learning_rate": 2.2606142110393248e-06, |
|
"logits": -0.5464334487915039, |
|
"logps": -75.34829711914062, |
|
"loss": 329.8079, |
|
"objective": 328.7645568847656, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.2314998209476471, |
|
"step": 610 |
|
}, |
|
{ |
|
"dpo_loss": 0.5274057984352112, |
|
"epoch": 1.743032593292395, |
|
"grad_norm": 8788.596472210756, |
|
"learning_rate": 2.2195020057179897e-06, |
|
"logits": -0.5443993210792542, |
|
"logps": -75.97472381591797, |
|
"loss": 328.8101, |
|
"objective": 338.30584716796875, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.255048006772995, |
|
"step": 615 |
|
}, |
|
{ |
|
"dpo_loss": 0.5214657187461853, |
|
"epoch": 1.7572035899858292, |
|
"grad_norm": 8592.256473559864, |
|
"learning_rate": 2.1784664857475356e-06, |
|
"logits": -0.5270652174949646, |
|
"logps": -76.55162811279297, |
|
"loss": 325.7626, |
|
"objective": 319.66046142578125, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.23240961134433746, |
|
"step": 620 |
|
}, |
|
{ |
|
"dpo_loss": 0.5136802196502686, |
|
"epoch": 1.7713745866792632, |
|
"grad_norm": 7718.82478571958, |
|
"learning_rate": 2.1375188698293855e-06, |
|
"logits": -0.462724506855011, |
|
"logps": -76.12433624267578, |
|
"loss": 330.2958, |
|
"objective": 325.9873046875, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.22590765357017517, |
|
"step": 625 |
|
}, |
|
{ |
|
"dpo_loss": 0.536301851272583, |
|
"epoch": 1.7855455833726972, |
|
"grad_norm": 8761.966130498178, |
|
"learning_rate": 2.096670352632873e-06, |
|
"logits": -0.5007703304290771, |
|
"logps": -77.76058197021484, |
|
"loss": 326.3625, |
|
"objective": 339.7125549316406, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.23891252279281616, |
|
"step": 630 |
|
}, |
|
{ |
|
"dpo_loss": 0.517907977104187, |
|
"epoch": 1.7997165800661312, |
|
"grad_norm": 7178.908644562367, |
|
"learning_rate": 2.0559321017347286e-06, |
|
"logits": -0.585433840751648, |
|
"logps": -76.77262878417969, |
|
"loss": 319.7427, |
|
"objective": 316.2701110839844, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.218379944562912, |
|
"step": 635 |
|
}, |
|
{ |
|
"dpo_loss": 0.5069996118545532, |
|
"epoch": 1.8138875767595655, |
|
"grad_norm": 7586.408561007724, |
|
"learning_rate": 2.01531525456598e-06, |
|
"logits": -0.5689796805381775, |
|
"logps": -77.61341094970703, |
|
"loss": 320.7854, |
|
"objective": 324.8812255859375, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.2512456178665161, |
|
"step": 640 |
|
}, |
|
{ |
|
"dpo_loss": 0.5200368762016296, |
|
"epoch": 1.8280585734529995, |
|
"grad_norm": 8686.608118182792, |
|
"learning_rate": 1.974830915367086e-06, |
|
"logits": -0.5587595701217651, |
|
"logps": -78.18843078613281, |
|
"loss": 330.6056, |
|
"objective": 326.5403747558594, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.24110235273838043, |
|
"step": 645 |
|
}, |
|
{ |
|
"dpo_loss": 0.5109516382217407, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 9365.891667609872, |
|
"learning_rate": 1.93449015215215e-06, |
|
"logits": -0.5691719055175781, |
|
"logps": -78.31002807617188, |
|
"loss": 333.5158, |
|
"objective": 333.96240234375, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.23523901402950287, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.8422295701464337, |
|
"eval_dpo_loss": 0.7084385752677917, |
|
"eval_logits": -0.596887469291687, |
|
"eval_logps": -81.02249908447266, |
|
"eval_loss": 577.92919921875, |
|
"eval_objective": 565.5914916992188, |
|
"eval_ranking_simple": 0.5393374562263489, |
|
"eval_regularize": 0.5890585780143738, |
|
"eval_runtime": 370.9761, |
|
"eval_samples_per_second": 15.607, |
|
"eval_steps_per_second": 1.302, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 0.5157236456871033, |
|
"epoch": 1.8564005668398678, |
|
"grad_norm": 8570.37706164772, |
|
"learning_rate": 1.8943039936830347e-06, |
|
"logits": -0.5198069214820862, |
|
"logps": -75.62894439697266, |
|
"loss": 321.6153, |
|
"objective": 320.3071594238281, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.2329235076904297, |
|
"step": 655 |
|
}, |
|
{ |
|
"dpo_loss": 0.521937906742096, |
|
"epoch": 1.8705715635333018, |
|
"grad_norm": 8345.743884997746, |
|
"learning_rate": 1.8542834264542091e-06, |
|
"logits": -0.5126068592071533, |
|
"logps": -78.36030578613281, |
|
"loss": 328.7253, |
|
"objective": 325.4534606933594, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.2483067512512207, |
|
"step": 660 |
|
}, |
|
{ |
|
"dpo_loss": 0.515285313129425, |
|
"epoch": 1.8847425602267358, |
|
"grad_norm": 7929.985369188011, |
|
"learning_rate": 1.814439391689151e-06, |
|
"logits": -0.5389847159385681, |
|
"logps": -76.56269073486328, |
|
"loss": 319.5206, |
|
"objective": 331.35791015625, |
|
"ranking_simple": 0.6625000238418579, |
|
"regularize": 0.23167690634727478, |
|
"step": 665 |
|
}, |
|
{ |
|
"dpo_loss": 0.5117120146751404, |
|
"epoch": 1.89891355692017, |
|
"grad_norm": 8611.003311273333, |
|
"learning_rate": 1.7747827823491253e-06, |
|
"logits": -0.4842732548713684, |
|
"logps": -75.30670928955078, |
|
"loss": 310.9476, |
|
"objective": 312.0437316894531, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.23464351892471313, |
|
"step": 670 |
|
}, |
|
{ |
|
"dpo_loss": 0.5363326668739319, |
|
"epoch": 1.9130845536136043, |
|
"grad_norm": 8390.641065212347, |
|
"learning_rate": 1.7353244401551566e-06, |
|
"logits": -0.4712333679199219, |
|
"logps": -76.18867492675781, |
|
"loss": 317.9931, |
|
"objective": 336.70703125, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.23009681701660156, |
|
"step": 675 |
|
}, |
|
{ |
|
"dpo_loss": 0.5199182033538818, |
|
"epoch": 1.9272555503070383, |
|
"grad_norm": 8581.710628204828, |
|
"learning_rate": 1.6960751526240122e-06, |
|
"logits": -0.49357444047927856, |
|
"logps": -78.31690216064453, |
|
"loss": 315.1092, |
|
"objective": 305.8600769042969, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.20979805290699005, |
|
"step": 680 |
|
}, |
|
{ |
|
"dpo_loss": 0.5106812119483948, |
|
"epoch": 1.9414265470004723, |
|
"grad_norm": 8197.15415072432, |
|
"learning_rate": 1.6570456501189996e-06, |
|
"logits": -0.5017139911651611, |
|
"logps": -76.01095581054688, |
|
"loss": 306.0857, |
|
"objective": 298.7364501953125, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.20531941950321198, |
|
"step": 685 |
|
}, |
|
{ |
|
"dpo_loss": 0.5228912830352783, |
|
"epoch": 1.9555975436939064, |
|
"grad_norm": 8227.185493128562, |
|
"learning_rate": 1.6182466029163974e-06, |
|
"logits": -0.503932535648346, |
|
"logps": -76.89771270751953, |
|
"loss": 312.1701, |
|
"objective": 309.2964782714844, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.1995265930891037, |
|
"step": 690 |
|
}, |
|
{ |
|
"dpo_loss": 0.5291420817375183, |
|
"epoch": 1.9697685403873406, |
|
"grad_norm": 8124.705980309312, |
|
"learning_rate": 1.5796886182883053e-06, |
|
"logits": -0.47076740860939026, |
|
"logps": -77.54142761230469, |
|
"loss": 303.4818, |
|
"objective": 305.0574951171875, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.16832788288593292, |
|
"step": 695 |
|
}, |
|
{ |
|
"dpo_loss": 0.5130675435066223, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 8798.438767811667, |
|
"learning_rate": 1.541382237602721e-06, |
|
"logits": -0.4266551434993744, |
|
"logps": -77.73848724365234, |
|
"loss": 316.2014, |
|
"objective": 308.5932312011719, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.20295077562332153, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 0.709835410118103, |
|
"eval_logits": -0.5956435203552246, |
|
"eval_logps": -80.5416488647461, |
|
"eval_loss": 577.6038208007812, |
|
"eval_objective": 564.6389770507812, |
|
"eval_ranking_simple": 0.5408902764320374, |
|
"eval_regularize": 0.5856665968894958, |
|
"eval_runtime": 367.2185, |
|
"eval_samples_per_second": 15.767, |
|
"eval_steps_per_second": 1.315, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 0.5043766498565674, |
|
"epoch": 1.9981105337742089, |
|
"grad_norm": 8813.527233337918, |
|
"learning_rate": 1.5033379334416376e-06, |
|
"logits": -0.415615975856781, |
|
"logps": -76.69499969482422, |
|
"loss": 307.2797, |
|
"objective": 299.0835266113281, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.1889916956424713, |
|
"step": 705 |
|
}, |
|
{ |
|
"dpo_loss": 0.5075303316116333, |
|
"epoch": 2.012281530467643, |
|
"grad_norm": 8152.029201211714, |
|
"learning_rate": 1.465566106737942e-06, |
|
"logits": -0.5361739993095398, |
|
"logps": -76.06444549560547, |
|
"loss": 296.988, |
|
"objective": 296.4339904785156, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.16836042702198029, |
|
"step": 710 |
|
}, |
|
{ |
|
"dpo_loss": 0.499095618724823, |
|
"epoch": 2.026452527161077, |
|
"grad_norm": 7474.705902903634, |
|
"learning_rate": 1.4280770839319073e-06, |
|
"logits": -0.5198894739151001, |
|
"logps": -75.47317504882812, |
|
"loss": 290.1166, |
|
"objective": 297.8743896484375, |
|
"ranking_simple": 0.7041666507720947, |
|
"regularize": 0.2067592293024063, |
|
"step": 715 |
|
}, |
|
{ |
|
"dpo_loss": 0.49675101041793823, |
|
"epoch": 2.040623523854511, |
|
"grad_norm": 7687.909209756474, |
|
"learning_rate": 1.3908811141480408e-06, |
|
"logits": -0.48511701822280884, |
|
"logps": -76.5396957397461, |
|
"loss": 293.1388, |
|
"objective": 296.0254821777344, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.18073533475399017, |
|
"step": 720 |
|
}, |
|
{ |
|
"dpo_loss": 0.5085265636444092, |
|
"epoch": 2.0547945205479454, |
|
"grad_norm": 7150.825567237203, |
|
"learning_rate": 1.353988366393083e-06, |
|
"logits": -0.4982639253139496, |
|
"logps": -79.44617462158203, |
|
"loss": 288.661, |
|
"objective": 303.2115173339844, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.1895003318786621, |
|
"step": 725 |
|
}, |
|
{ |
|
"dpo_loss": 0.5071407556533813, |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 7729.725354104077, |
|
"learning_rate": 1.3174089267758983e-06, |
|
"logits": -0.5406936407089233, |
|
"logps": -77.1041488647461, |
|
"loss": 295.914, |
|
"objective": 279.0093078613281, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.1544083058834076, |
|
"step": 730 |
|
}, |
|
{ |
|
"dpo_loss": 0.508653998374939, |
|
"epoch": 2.0831365139348135, |
|
"grad_norm": 7709.562610036098, |
|
"learning_rate": 1.2811527957500344e-06, |
|
"logits": -0.5032610297203064, |
|
"logps": -76.36556243896484, |
|
"loss": 281.1705, |
|
"objective": 274.6349792480469, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.16756394505500793, |
|
"step": 735 |
|
}, |
|
{ |
|
"dpo_loss": 0.5007545351982117, |
|
"epoch": 2.0973075106282475, |
|
"grad_norm": 7780.159502686646, |
|
"learning_rate": 1.245229885379699e-06, |
|
"logits": -0.5718483328819275, |
|
"logps": -76.52877807617188, |
|
"loss": 287.3269, |
|
"objective": 281.77728271484375, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.15750272572040558, |
|
"step": 740 |
|
}, |
|
{ |
|
"dpo_loss": 0.49083974957466125, |
|
"epoch": 2.1114785073216815, |
|
"grad_norm": 7559.0234640974395, |
|
"learning_rate": 1.2096500166298992e-06, |
|
"logits": -0.5142738223075867, |
|
"logps": -77.08470916748047, |
|
"loss": 288.8493, |
|
"objective": 278.6231384277344, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.1696869283914566, |
|
"step": 745 |
|
}, |
|
{ |
|
"dpo_loss": 0.510530412197113, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 8108.616262187852, |
|
"learning_rate": 1.1744229166814889e-06, |
|
"logits": -0.5391489267349243, |
|
"logps": -75.9011459350586, |
|
"loss": 295.2996, |
|
"objective": 287.6244201660156, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.14458681643009186, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.1256495040151155, |
|
"eval_dpo_loss": 0.7108220458030701, |
|
"eval_logits": -0.5878574252128601, |
|
"eval_logps": -81.0738525390625, |
|
"eval_loss": 579.5015258789062, |
|
"eval_objective": 567.8404541015625, |
|
"eval_ranking_simple": 0.5393374562263489, |
|
"eval_regularize": 0.592528223991394, |
|
"eval_runtime": 367.7716, |
|
"eval_samples_per_second": 15.743, |
|
"eval_steps_per_second": 1.313, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 0.5009695291519165, |
|
"epoch": 2.13982050070855, |
|
"grad_norm": 8018.931804650577, |
|
"learning_rate": 1.1395582162718524e-06, |
|
"logits": -0.5374471545219421, |
|
"logps": -78.51016998291016, |
|
"loss": 291.8306, |
|
"objective": 290.9220275878906, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.1683264821767807, |
|
"step": 755 |
|
}, |
|
{ |
|
"dpo_loss": 0.5287482738494873, |
|
"epoch": 2.153991497401984, |
|
"grad_norm": 7462.123206327768, |
|
"learning_rate": 1.1050654470619602e-06, |
|
"logits": -0.46891796588897705, |
|
"logps": -75.76030731201172, |
|
"loss": 290.7075, |
|
"objective": 282.1697082519531, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.15730994939804077, |
|
"step": 760 |
|
}, |
|
{ |
|
"dpo_loss": 0.5191124081611633, |
|
"epoch": 2.168162494095418, |
|
"grad_norm": 8366.940049042583, |
|
"learning_rate": 1.0709540390305061e-06, |
|
"logits": -0.48605969548225403, |
|
"logps": -76.97061920166016, |
|
"loss": 283.513, |
|
"objective": 276.4500732421875, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.14232973754405975, |
|
"step": 765 |
|
}, |
|
{ |
|
"dpo_loss": 0.5144416093826294, |
|
"epoch": 2.182333490788852, |
|
"grad_norm": 8218.822763503342, |
|
"learning_rate": 1.0372333178958462e-06, |
|
"logits": -0.4805113971233368, |
|
"logps": -77.28369140625, |
|
"loss": 295.4732, |
|
"objective": 300.4273376464844, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.17838290333747864, |
|
"step": 770 |
|
}, |
|
{ |
|
"dpo_loss": 0.5137962698936462, |
|
"epoch": 2.196504487482286, |
|
"grad_norm": 6441.500828763883, |
|
"learning_rate": 1.0039125025664392e-06, |
|
"logits": -0.5039299130439758, |
|
"logps": -77.55305480957031, |
|
"loss": 277.693, |
|
"objective": 280.0492248535156, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.14146603643894196, |
|
"step": 775 |
|
}, |
|
{ |
|
"dpo_loss": 0.5056738257408142, |
|
"epoch": 2.21067548417572, |
|
"grad_norm": 8834.102544429094, |
|
"learning_rate": 9.710007026204896e-07, |
|
"logits": -0.43781086802482605, |
|
"logps": -77.69278717041016, |
|
"loss": 288.3063, |
|
"objective": 297.6618347167969, |
|
"ranking_simple": 0.5249999761581421, |
|
"regularize": 0.16135714948177338, |
|
"step": 780 |
|
}, |
|
{ |
|
"dpo_loss": 0.4925435483455658, |
|
"epoch": 2.2248464808691546, |
|
"grad_norm": 7651.241881609664, |
|
"learning_rate": 9.385069158154805e-07, |
|
"logits": -0.4533029794692993, |
|
"logps": -77.70331573486328, |
|
"loss": 280.9115, |
|
"objective": 270.46942138671875, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.14943251013755798, |
|
"step": 785 |
|
}, |
|
{ |
|
"dpo_loss": 0.49593406915664673, |
|
"epoch": 2.2390174775625886, |
|
"grad_norm": 7219.442805293083, |
|
"learning_rate": 9.064400256282757e-07, |
|
"logits": -0.44717687368392944, |
|
"logps": -78.27497863769531, |
|
"loss": 282.4128, |
|
"objective": 267.8057861328125, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.15546827018260956, |
|
"step": 790 |
|
}, |
|
{ |
|
"dpo_loss": 0.505749523639679, |
|
"epoch": 2.2531884742560226, |
|
"grad_norm": 7763.536847557715, |
|
"learning_rate": 8.74808798826467e-07, |
|
"logits": -0.4969979226589203, |
|
"logps": -79.33271789550781, |
|
"loss": 281.3165, |
|
"objective": 274.2493896484375, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.1496947556734085, |
|
"step": 795 |
|
}, |
|
{ |
|
"dpo_loss": 0.48660808801651, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 7932.089944092327, |
|
"learning_rate": 8.436218830716259e-07, |
|
"logits": -0.5253292322158813, |
|
"logps": -78.79373931884766, |
|
"loss": 290.0791, |
|
"objective": 280.91900634765625, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.16623292863368988, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.2673594709494567, |
|
"eval_dpo_loss": 0.7088403105735779, |
|
"eval_logits": -0.5885158777236938, |
|
"eval_logps": -81.68885803222656, |
|
"eval_loss": 576.8207397460938, |
|
"eval_objective": 564.8282470703125, |
|
"eval_ranking_simple": 0.5377846956253052, |
|
"eval_regularize": 0.5875974893569946, |
|
"eval_runtime": 367.7198, |
|
"eval_samples_per_second": 15.746, |
|
"eval_steps_per_second": 1.314, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 0.5162143707275391, |
|
"epoch": 2.2815304676428907, |
|
"grad_norm": 7214.841128896386, |
|
"learning_rate": 8.1288780455512e-07, |
|
"logits": -0.5276073217391968, |
|
"logps": -77.3137435913086, |
|
"loss": 278.3046, |
|
"objective": 292.67730712890625, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.16379231214523315, |
|
"step": 805 |
|
}, |
|
{ |
|
"dpo_loss": 0.5017233490943909, |
|
"epoch": 2.295701464336325, |
|
"grad_norm": 6869.828749708057, |
|
"learning_rate": 7.826149656671386e-07, |
|
"logits": -0.5925108790397644, |
|
"logps": -76.71666717529297, |
|
"loss": 282.4796, |
|
"objective": 281.0669860839844, |
|
"ranking_simple": 0.675000011920929, |
|
"regularize": 0.1503111720085144, |
|
"step": 810 |
|
}, |
|
{ |
|
"dpo_loss": 0.4981227219104767, |
|
"epoch": 2.309872461029759, |
|
"grad_norm": 7651.540848559703, |
|
"learning_rate": 7.528116426995605e-07, |
|
"logits": -0.5629077553749084, |
|
"logps": -78.19300079345703, |
|
"loss": 273.4494, |
|
"objective": 278.7046203613281, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.13803134858608246, |
|
"step": 815 |
|
}, |
|
{ |
|
"dpo_loss": 0.5106547474861145, |
|
"epoch": 2.324043457723193, |
|
"grad_norm": 8303.88053874128, |
|
"learning_rate": 7.234859835833022e-07, |
|
"logits": -0.49964413046836853, |
|
"logps": -77.4349136352539, |
|
"loss": 281.5386, |
|
"objective": 289.7933349609375, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.14034216105937958, |
|
"step": 820 |
|
}, |
|
{ |
|
"dpo_loss": 0.5033460259437561, |
|
"epoch": 2.3382144544166272, |
|
"grad_norm": 7465.14408583757, |
|
"learning_rate": 6.94646005660749e-07, |
|
"logits": -0.5037187337875366, |
|
"logps": -76.92729949951172, |
|
"loss": 278.1418, |
|
"objective": 275.7555847167969, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.14195986092090607, |
|
"step": 825 |
|
}, |
|
{ |
|
"dpo_loss": 0.5009591579437256, |
|
"epoch": 2.3523854511100613, |
|
"grad_norm": 7370.912864973515, |
|
"learning_rate": 6.662995934939007e-07, |
|
"logits": -0.5249782800674438, |
|
"logps": -78.88058471679688, |
|
"loss": 277.6341, |
|
"objective": 290.5874328613281, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.15550047159194946, |
|
"step": 830 |
|
}, |
|
{ |
|
"dpo_loss": 0.506280243396759, |
|
"epoch": 2.3665564478034957, |
|
"grad_norm": 7154.510003938923, |
|
"learning_rate": 6.384544967088063e-07, |
|
"logits": -0.5261546969413757, |
|
"logps": -78.27130889892578, |
|
"loss": 283.8206, |
|
"objective": 284.1214294433594, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.15022988617420197, |
|
"step": 835 |
|
}, |
|
{ |
|
"dpo_loss": 0.5248311758041382, |
|
"epoch": 2.3807274444969297, |
|
"grad_norm": 7561.035287310657, |
|
"learning_rate": 6.111183278768956e-07, |
|
"logits": -0.47096776962280273, |
|
"logps": -78.47908020019531, |
|
"loss": 281.1299, |
|
"objective": 287.78863525390625, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.13501495122909546, |
|
"step": 840 |
|
}, |
|
{ |
|
"dpo_loss": 0.5237764120101929, |
|
"epoch": 2.3948984411903638, |
|
"grad_norm": 9019.208502842846, |
|
"learning_rate": 5.842985604337769e-07, |
|
"logits": -0.524657666683197, |
|
"logps": -79.37838745117188, |
|
"loss": 288.7943, |
|
"objective": 290.1376647949219, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.13873761892318726, |
|
"step": 845 |
|
}, |
|
{ |
|
"dpo_loss": 0.5015512704849243, |
|
"epoch": 2.409069437883798, |
|
"grad_norm": 7214.405283580069, |
|
"learning_rate": 5.580025266360764e-07, |
|
"logits": -0.5334345102310181, |
|
"logps": -77.42970275878906, |
|
"loss": 277.1292, |
|
"objective": 274.3782043457031, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.13637620210647583, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.409069437883798, |
|
"eval_dpo_loss": 0.7109217643737793, |
|
"eval_logits": -0.5770819187164307, |
|
"eval_logps": -81.54353332519531, |
|
"eval_loss": 579.0093994140625, |
|
"eval_objective": 567.1205444335938, |
|
"eval_ranking_simple": 0.5383023023605347, |
|
"eval_regularize": 0.5911442041397095, |
|
"eval_runtime": 370.8205, |
|
"eval_samples_per_second": 15.614, |
|
"eval_steps_per_second": 1.303, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 0.5071312785148621, |
|
"epoch": 2.423240434577232, |
|
"grad_norm": 7272.234405221214, |
|
"learning_rate": 5.322374155568688e-07, |
|
"logits": -0.5134973526000977, |
|
"logps": -76.6072769165039, |
|
"loss": 278.8906, |
|
"objective": 276.31671142578125, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.14478901028633118, |
|
"step": 855 |
|
}, |
|
{ |
|
"dpo_loss": 0.5126808881759644, |
|
"epoch": 2.4374114312706663, |
|
"grad_norm": 8279.020308548867, |
|
"learning_rate": 5.070102711202606e-07, |
|
"logits": -0.4904005825519562, |
|
"logps": -78.11277770996094, |
|
"loss": 272.6675, |
|
"objective": 259.9554748535156, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.12409182637929916, |
|
"step": 860 |
|
}, |
|
{ |
|
"dpo_loss": 0.5031525492668152, |
|
"epoch": 2.4515824279641003, |
|
"grad_norm": 7810.5501789039035, |
|
"learning_rate": 4.823279901756498e-07, |
|
"logits": -0.5084951519966125, |
|
"logps": -77.30481719970703, |
|
"loss": 278.5447, |
|
"objective": 284.18084716796875, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.1416517198085785, |
|
"step": 865 |
|
}, |
|
{ |
|
"dpo_loss": 0.5099405646324158, |
|
"epoch": 2.4657534246575343, |
|
"grad_norm": 7054.3955623341435, |
|
"learning_rate": 4.581973206121948e-07, |
|
"logits": -0.522720456123352, |
|
"logps": -78.04560852050781, |
|
"loss": 278.9006, |
|
"objective": 274.6994934082031, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.13992194831371307, |
|
"step": 870 |
|
}, |
|
{ |
|
"dpo_loss": 0.4974448084831238, |
|
"epoch": 2.4799244213509684, |
|
"grad_norm": 7158.801947079291, |
|
"learning_rate": 4.3462485951401126e-07, |
|
"logits": -0.481945663690567, |
|
"logps": -77.63534545898438, |
|
"loss": 265.5646, |
|
"objective": 269.0285949707031, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.13618159294128418, |
|
"step": 875 |
|
}, |
|
{ |
|
"dpo_loss": 0.516840398311615, |
|
"epoch": 2.4940954180444024, |
|
"grad_norm": 7224.52824602365, |
|
"learning_rate": 4.116170513565942e-07, |
|
"logits": -0.42012926936149597, |
|
"logps": -77.26931762695312, |
|
"loss": 277.9572, |
|
"objective": 277.60955810546875, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.12541402876377106, |
|
"step": 880 |
|
}, |
|
{ |
|
"dpo_loss": 0.5121944546699524, |
|
"epoch": 2.5082664147378364, |
|
"grad_norm": 7670.321121071118, |
|
"learning_rate": 3.891801862449629e-07, |
|
"logits": -0.5377725958824158, |
|
"logps": -76.21176147460938, |
|
"loss": 273.7286, |
|
"objective": 278.1051940917969, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.13135148584842682, |
|
"step": 885 |
|
}, |
|
{ |
|
"dpo_loss": 0.5038707852363586, |
|
"epoch": 2.5224374114312704, |
|
"grad_norm": 7464.3594622623605, |
|
"learning_rate": 3.6732039819400686e-07, |
|
"logits": -0.5120099782943726, |
|
"logps": -75.08890533447266, |
|
"loss": 270.7643, |
|
"objective": 259.2137145996094, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.1371425837278366, |
|
"step": 890 |
|
}, |
|
{ |
|
"dpo_loss": 0.5104149580001831, |
|
"epoch": 2.536608408124705, |
|
"grad_norm": 7776.17053954941, |
|
"learning_rate": 3.46043663451511e-07, |
|
"logits": -0.5063762068748474, |
|
"logps": -77.50283813476562, |
|
"loss": 274.0527, |
|
"objective": 279.8547668457031, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.13040438294410706, |
|
"step": 895 |
|
}, |
|
{ |
|
"dpo_loss": 0.5273467302322388, |
|
"epoch": 2.550779404818139, |
|
"grad_norm": 7627.369911355359, |
|
"learning_rate": 3.253557988643072e-07, |
|
"logits": -0.49025458097457886, |
|
"logps": -76.16547393798828, |
|
"loss": 271.9766, |
|
"objective": 274.15386962890625, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.13161370158195496, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.550779404818139, |
|
"eval_dpo_loss": 0.7098731994628906, |
|
"eval_logits": -0.5707982182502747, |
|
"eval_logps": -81.16320037841797, |
|
"eval_loss": 577.3417358398438, |
|
"eval_objective": 565.7183837890625, |
|
"eval_ranking_simple": 0.5362318754196167, |
|
"eval_regularize": 0.5880821347236633, |
|
"eval_runtime": 368.1908, |
|
"eval_samples_per_second": 15.726, |
|
"eval_steps_per_second": 1.312, |
|
"step": 900 |
|
}, |
|
{ |
|
"dpo_loss": 0.512609601020813, |
|
"epoch": 2.564950401511573, |
|
"grad_norm": 6804.885302407535, |
|
"learning_rate": 3.052624602880064e-07, |
|
"logits": -0.5115708112716675, |
|
"logps": -75.81733703613281, |
|
"loss": 265.9078, |
|
"objective": 269.9800109863281, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.14800624549388885, |
|
"step": 905 |
|
}, |
|
{ |
|
"dpo_loss": 0.4937320947647095, |
|
"epoch": 2.579121398205007, |
|
"grad_norm": 7198.203187575293, |
|
"learning_rate": 2.8576914104074425e-07, |
|
"logits": -0.48827874660491943, |
|
"logps": -79.18850708007812, |
|
"loss": 265.7443, |
|
"objective": 273.89166259765625, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.14111953973770142, |
|
"step": 910 |
|
}, |
|
{ |
|
"dpo_loss": 0.5084269642829895, |
|
"epoch": 2.593292394898441, |
|
"grad_norm": 6942.193456170007, |
|
"learning_rate": 2.6688117040136463e-07, |
|
"logits": -0.5942879915237427, |
|
"logps": -78.21766662597656, |
|
"loss": 278.3137, |
|
"objective": 263.6408386230469, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.11606747657060623, |
|
"step": 915 |
|
}, |
|
{ |
|
"dpo_loss": 0.498431533575058, |
|
"epoch": 2.6074633915918755, |
|
"grad_norm": 6451.638108152793, |
|
"learning_rate": 2.486037121524448e-07, |
|
"logits": -0.46755722165107727, |
|
"logps": -77.2091293334961, |
|
"loss": 270.5728, |
|
"objective": 287.58538818359375, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.14371080696582794, |
|
"step": 920 |
|
}, |
|
{ |
|
"dpo_loss": 0.5088561177253723, |
|
"epoch": 2.6216343882853095, |
|
"grad_norm": 8257.41304629772, |
|
"learning_rate": 2.3094176316856982e-07, |
|
"logits": -0.4761093258857727, |
|
"logps": -76.82450866699219, |
|
"loss": 270.0178, |
|
"objective": 261.9400634765625, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.11346574872732162, |
|
"step": 925 |
|
}, |
|
{ |
|
"dpo_loss": 0.5108747482299805, |
|
"epoch": 2.6358053849787435, |
|
"grad_norm": 7715.040672522825, |
|
"learning_rate": 2.13900152050239e-07, |
|
"logits": -0.4767756164073944, |
|
"logps": -77.13011932373047, |
|
"loss": 273.9086, |
|
"objective": 285.1964111328125, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.13920140266418457, |
|
"step": 930 |
|
}, |
|
{ |
|
"dpo_loss": 0.5189302563667297, |
|
"epoch": 2.6499763816721775, |
|
"grad_norm": 8548.789471220947, |
|
"learning_rate": 1.9748353780377234e-07, |
|
"logits": -0.45983853936195374, |
|
"logps": -76.03173828125, |
|
"loss": 276.8373, |
|
"objective": 269.6080322265625, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.12301207333803177, |
|
"step": 935 |
|
}, |
|
{ |
|
"dpo_loss": 0.5017234683036804, |
|
"epoch": 2.6641473783656116, |
|
"grad_norm": 8162.699837992091, |
|
"learning_rate": 1.8169640856758652e-07, |
|
"logits": -0.504283607006073, |
|
"logps": -78.90401458740234, |
|
"loss": 281.2083, |
|
"objective": 285.80694580078125, |
|
"ranking_simple": 0.5958333611488342, |
|
"regularize": 0.1213822215795517, |
|
"step": 940 |
|
}, |
|
{ |
|
"dpo_loss": 0.4903925657272339, |
|
"epoch": 2.678318375059046, |
|
"grad_norm": 7208.828785964443, |
|
"learning_rate": 1.6654308038518057e-07, |
|
"logits": -0.5296005010604858, |
|
"logps": -77.27458953857422, |
|
"loss": 265.3151, |
|
"objective": 269.78106689453125, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.12937407195568085, |
|
"step": 945 |
|
}, |
|
{ |
|
"dpo_loss": 0.5223451256752014, |
|
"epoch": 2.69248937175248, |
|
"grad_norm": 7752.613058246486, |
|
"learning_rate": 1.5202769602517514e-07, |
|
"logits": -0.45917627215385437, |
|
"logps": -77.59455871582031, |
|
"loss": 273.4982, |
|
"objective": 260.7156982421875, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.10433920472860336, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.69248937175248, |
|
"eval_dpo_loss": 0.7103093862533569, |
|
"eval_logits": -0.5680380463600159, |
|
"eval_logps": -81.19538879394531, |
|
"eval_loss": 578.9320678710938, |
|
"eval_objective": 567.1773071289062, |
|
"eval_ranking_simple": 0.5362318754196167, |
|
"eval_regularize": 0.5910264849662781, |
|
"eval_runtime": 367.6922, |
|
"eval_samples_per_second": 15.747, |
|
"eval_steps_per_second": 1.314, |
|
"step": 950 |
|
}, |
|
{ |
|
"dpo_loss": 0.5060604810714722, |
|
"epoch": 2.706660368445914, |
|
"grad_norm": 6751.002791925838, |
|
"learning_rate": 1.381542238487188e-07, |
|
"logits": -0.5122019648551941, |
|
"logps": -77.12061309814453, |
|
"loss": 264.4783, |
|
"objective": 250.47158813476562, |
|
"ranking_simple": 0.5916666388511658, |
|
"regularize": 0.12410003691911697, |
|
"step": 955 |
|
}, |
|
{ |
|
"dpo_loss": 0.5180147886276245, |
|
"epoch": 2.720831365139348, |
|
"grad_norm": 8054.959159664104, |
|
"learning_rate": 1.2492645672457838e-07, |
|
"logits": -0.48304110765457153, |
|
"logps": -77.1063003540039, |
|
"loss": 279.6324, |
|
"objective": 290.03631591796875, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.13714757561683655, |
|
"step": 960 |
|
}, |
|
{ |
|
"dpo_loss": 0.49898579716682434, |
|
"epoch": 2.735002361832782, |
|
"grad_norm": 7601.482545856324, |
|
"learning_rate": 1.1234801099220787e-07, |
|
"logits": -0.5196807980537415, |
|
"logps": -77.11388397216797, |
|
"loss": 269.8563, |
|
"objective": 259.89813232421875, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.13252395391464233, |
|
"step": 965 |
|
}, |
|
{ |
|
"dpo_loss": 0.51671302318573, |
|
"epoch": 2.7491733585262166, |
|
"grad_norm": 7233.095787852224, |
|
"learning_rate": 1.004223254730749e-07, |
|
"logits": -0.5174197554588318, |
|
"logps": -78.05948638916016, |
|
"loss": 262.7624, |
|
"objective": 268.42169189453125, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.13439247012138367, |
|
"step": 970 |
|
}, |
|
{ |
|
"dpo_loss": 0.4989195764064789, |
|
"epoch": 2.7633443552196506, |
|
"grad_norm": 7653.138309833151, |
|
"learning_rate": 8.915266053052374e-08, |
|
"logits": -0.4456302523612976, |
|
"logps": -76.5742416381836, |
|
"loss": 265.2271, |
|
"objective": 260.4195556640625, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.12436621636152267, |
|
"step": 975 |
|
}, |
|
{ |
|
"dpo_loss": 0.5052908658981323, |
|
"epoch": 2.7775153519130846, |
|
"grad_norm": 7354.157364452891, |
|
"learning_rate": 7.854209717842231e-08, |
|
"logits": -0.5343514084815979, |
|
"logps": -76.55020141601562, |
|
"loss": 273.2646, |
|
"objective": 284.3968200683594, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.1349634826183319, |
|
"step": 980 |
|
}, |
|
{ |
|
"dpo_loss": 0.5007596015930176, |
|
"epoch": 2.7916863486065187, |
|
"grad_norm": 6932.45863115929, |
|
"learning_rate": 6.859353623884569e-08, |
|
"logits": -0.44778621196746826, |
|
"logps": -74.72814178466797, |
|
"loss": 272.9477, |
|
"objective": 261.4037170410156, |
|
"ranking_simple": 0.5874999761581421, |
|
"regularize": 0.12619872391223907, |
|
"step": 985 |
|
}, |
|
{ |
|
"dpo_loss": 0.50762540102005, |
|
"epoch": 2.8058573452999527, |
|
"grad_norm": 7347.273398591806, |
|
"learning_rate": 5.930969754901844e-08, |
|
"logits": -0.4778214693069458, |
|
"logps": -78.38259887695312, |
|
"loss": 260.7251, |
|
"objective": 262.8490905761719, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.11383456736803055, |
|
"step": 990 |
|
}, |
|
{ |
|
"dpo_loss": 0.5177646279335022, |
|
"epoch": 2.820028341993387, |
|
"grad_norm": 8414.372934711379, |
|
"learning_rate": 5.069311921774039e-08, |
|
"logits": -0.5479720830917358, |
|
"logps": -76.77764129638672, |
|
"loss": 273.3689, |
|
"objective": 282.17889404296875, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.13415595889091492, |
|
"step": 995 |
|
}, |
|
{ |
|
"dpo_loss": 0.5031678080558777, |
|
"epoch": 2.8341993386868207, |
|
"grad_norm": 6910.266147708002, |
|
"learning_rate": 4.2746156931490756e-08, |
|
"logits": -0.4682956337928772, |
|
"logps": -75.585205078125, |
|
"loss": 265.7935, |
|
"objective": 275.0522155761719, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.12777787446975708, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.8341993386868207, |
|
"eval_dpo_loss": 0.709902822971344, |
|
"eval_logits": -0.5704091191291809, |
|
"eval_logps": -81.14698028564453, |
|
"eval_loss": 578.3192138671875, |
|
"eval_objective": 566.560791015625, |
|
"eval_ranking_simple": 0.5367494821548462, |
|
"eval_regularize": 0.5901817083358765, |
|
"eval_runtime": 368.6553, |
|
"eval_samples_per_second": 15.706, |
|
"eval_steps_per_second": 1.31, |
|
"step": 1000 |
|
}, |
|
{ |
|
"dpo_loss": 0.5090419054031372, |
|
"epoch": 2.848370335380255, |
|
"grad_norm": 7970.005574260741, |
|
"learning_rate": 3.547098331040916e-08, |
|
"logits": -0.48795023560523987, |
|
"logps": -76.1548843383789, |
|
"loss": 272.1969, |
|
"objective": 268.9000244140625, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.12190086394548416, |
|
"step": 1005 |
|
}, |
|
{ |
|
"dpo_loss": 0.5107741355895996, |
|
"epoch": 2.862541332073689, |
|
"grad_norm": 7994.674781206224, |
|
"learning_rate": 2.8869587314321324e-08, |
|
"logits": -0.5256258845329285, |
|
"logps": -76.37800598144531, |
|
"loss": 282.5812, |
|
"objective": 272.2499084472656, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.11501624435186386, |
|
"step": 1010 |
|
}, |
|
{ |
|
"dpo_loss": 0.5150614380836487, |
|
"epoch": 2.8767123287671232, |
|
"grad_norm": 7261.730587266494, |
|
"learning_rate": 2.2943773698977935e-08, |
|
"logits": -0.4966468811035156, |
|
"logps": -75.5634994506836, |
|
"loss": 260.7579, |
|
"objective": 261.33038330078125, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.11271250247955322, |
|
"step": 1015 |
|
}, |
|
{ |
|
"dpo_loss": 0.508990466594696, |
|
"epoch": 2.8908833254605573, |
|
"grad_norm": 7841.280059779772, |
|
"learning_rate": 1.7695162522652352e-08, |
|
"logits": -0.4532057046890259, |
|
"logps": -77.1956558227539, |
|
"loss": 271.9978, |
|
"objective": 272.5274963378906, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.12634117901325226, |
|
"step": 1020 |
|
}, |
|
{ |
|
"dpo_loss": 0.5181344151496887, |
|
"epoch": 2.9050543221539913, |
|
"grad_norm": 7685.424658753134, |
|
"learning_rate": 1.3125188703233815e-08, |
|
"logits": -0.53793865442276, |
|
"logps": -77.12080383300781, |
|
"loss": 268.2244, |
|
"objective": 275.9144287109375, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.12161087244749069, |
|
"step": 1025 |
|
}, |
|
{ |
|
"dpo_loss": 0.5186858773231506, |
|
"epoch": 2.9192253188474258, |
|
"grad_norm": 7664.878619416483, |
|
"learning_rate": 9.235101625932885e-09, |
|
"logits": -0.535256564617157, |
|
"logps": -76.86356353759766, |
|
"loss": 277.9306, |
|
"objective": 276.3569641113281, |
|
"ranking_simple": 0.5541666746139526, |
|
"regularize": 0.13014444708824158, |
|
"step": 1030 |
|
}, |
|
{ |
|
"dpo_loss": 0.5086424350738525, |
|
"epoch": 2.9333963155408598, |
|
"grad_norm": 7176.213599878172, |
|
"learning_rate": 6.025964801714412e-09, |
|
"logits": -0.4943471848964691, |
|
"logps": -77.83872985839844, |
|
"loss": 266.904, |
|
"objective": 264.484375, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.11565522104501724, |
|
"step": 1035 |
|
}, |
|
{ |
|
"dpo_loss": 0.5082514882087708, |
|
"epoch": 2.947567312234294, |
|
"grad_norm": 7034.435559100273, |
|
"learning_rate": 3.4986555765434415e-09, |
|
"logits": -0.5228769779205322, |
|
"logps": -77.66551208496094, |
|
"loss": 271.848, |
|
"objective": 274.83355712890625, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.11535345017910004, |
|
"step": 1040 |
|
}, |
|
{ |
|
"dpo_loss": 0.5133834481239319, |
|
"epoch": 2.961738308927728, |
|
"grad_norm": 7599.023611459706, |
|
"learning_rate": 1.6538648915270794e-09, |
|
"logits": -0.47324731945991516, |
|
"logps": -79.62892150878906, |
|
"loss": 268.8998, |
|
"objective": 262.8771667480469, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.12152813374996185, |
|
"step": 1045 |
|
}, |
|
{ |
|
"dpo_loss": 0.5033511519432068, |
|
"epoch": 2.975909305621162, |
|
"grad_norm": 8022.09759775128, |
|
"learning_rate": 4.920970940180958e-10, |
|
"logits": -0.5222968459129333, |
|
"logps": -75.3882827758789, |
|
"loss": 265.6855, |
|
"objective": 262.70672607421875, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.1311034858226776, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.975909305621162, |
|
"eval_dpo_loss": 0.7098360061645508, |
|
"eval_logits": -0.5715492367744446, |
|
"eval_logps": -81.13370513916016, |
|
"eval_loss": 578.1592407226562, |
|
"eval_objective": 566.3953857421875, |
|
"eval_ranking_simple": 0.5362318754196167, |
|
"eval_regularize": 0.5898736715316772, |
|
"eval_runtime": 367.7364, |
|
"eval_samples_per_second": 15.745, |
|
"eval_steps_per_second": 1.313, |
|
"step": 1050 |
|
}, |
|
{ |
|
"dpo_loss": 0.5144506692886353, |
|
"epoch": 2.9900803023145963, |
|
"grad_norm": 7870.107583704036, |
|
"learning_rate": 1.3669799732163314e-11, |
|
"logits": -0.4851941168308258, |
|
"logps": -76.2236099243164, |
|
"loss": 272.1204, |
|
"objective": 281.3744201660156, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.13684484362602234, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 2.992914501653283, |
|
"step": 1056, |
|
"total_flos": 0.0, |
|
"train_loss": 356.3672220056707, |
|
"train_runtime": 34691.2758, |
|
"train_samples_per_second": 4.393, |
|
"train_steps_per_second": 0.03 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1056, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|