hZzy's picture
Model save
0f4f8ca verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.988190836088805,
"eval_steps": 50,
"global_step": 880,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"dpo_loss": 0.6931471824645996,
"epoch": 0.005668398677373642,
"grad_norm": 13.413595204669061,
"learning_rate": 5.681818181818182e-08,
"logits": -1.3147305250167847,
"logps": -88.0877456665039,
"loss": 0.4113,
"objective": 0.41588976979255676,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.5208333134651184,
"regularize": 0.41588976979255676,
"step": 1
},
{
"dpo_loss": 0.6931002736091614,
"epoch": 0.02834199338686821,
"grad_norm": 13.427332141277578,
"learning_rate": 2.840909090909091e-07,
"logits": -1.3680692911148071,
"logps": -84.26158905029297,
"loss": 0.4129,
"objective": 0.3754810094833374,
"ranking_idealized": 0.6510416865348816,
"ranking_idealized_expo": 0.5572916865348816,
"ranking_simple": 0.546875,
"regularize": 0.3754810094833374,
"step": 5
},
{
"dpo_loss": 0.6900920867919922,
"epoch": 0.05668398677373642,
"grad_norm": 12.67240246603534,
"learning_rate": 5.681818181818182e-07,
"logits": -1.4469478130340576,
"logps": -82.44185638427734,
"loss": 0.4149,
"objective": 0.43780091404914856,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.5166666507720947,
"regularize": 0.43780091404914856,
"step": 10
},
{
"dpo_loss": 0.6891883015632629,
"epoch": 0.08502598016060463,
"grad_norm": 12.900443318480342,
"learning_rate": 8.522727272727273e-07,
"logits": -1.4273536205291748,
"logps": -81.69231414794922,
"loss": 0.419,
"objective": 0.40471941232681274,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.512499988079071,
"regularize": 0.40471941232681274,
"step": 15
},
{
"dpo_loss": 0.6918003559112549,
"epoch": 0.11336797354747284,
"grad_norm": 14.313913436693964,
"learning_rate": 1.1363636363636364e-06,
"logits": -1.437472939491272,
"logps": -82.81884765625,
"loss": 0.4037,
"objective": 0.403365820646286,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5333333611488342,
"regularize": 0.403365820646286,
"step": 20
},
{
"dpo_loss": 0.6768646836280823,
"epoch": 0.14170996693434104,
"grad_norm": 13.58368371280586,
"learning_rate": 1.4204545454545458e-06,
"logits": -1.5096564292907715,
"logps": -82.65319061279297,
"loss": 0.3931,
"objective": 0.409546822309494,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.5041666626930237,
"regularize": 0.409546822309494,
"step": 25
},
{
"dpo_loss": 0.6711666584014893,
"epoch": 0.17005196032120926,
"grad_norm": 13.105526207742551,
"learning_rate": 1.7045454545454546e-06,
"logits": -1.5050361156463623,
"logps": -83.46080780029297,
"loss": 0.3841,
"objective": 0.38422220945358276,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5666666626930237,
"regularize": 0.38422220945358276,
"step": 30
},
{
"dpo_loss": 0.6604105234146118,
"epoch": 0.19839395370807747,
"grad_norm": 14.531223489181945,
"learning_rate": 1.9886363636363638e-06,
"logits": -1.5017287731170654,
"logps": -84.02853393554688,
"loss": 0.3722,
"objective": 0.35262614488601685,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.5333333611488342,
"regularize": 0.35262614488601685,
"step": 35
},
{
"dpo_loss": 0.6500855684280396,
"epoch": 0.22673594709494568,
"grad_norm": 14.371511230761167,
"learning_rate": 2.2727272727272728e-06,
"logits": -1.5274395942687988,
"logps": -84.69414520263672,
"loss": 0.379,
"objective": 0.3910427689552307,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.5583333373069763,
"regularize": 0.3910427689552307,
"step": 40
},
{
"dpo_loss": 0.639613151550293,
"epoch": 0.25507794048181387,
"grad_norm": 17.129866291690814,
"learning_rate": 2.556818181818182e-06,
"logits": -1.6906952857971191,
"logps": -86.7696304321289,
"loss": 0.3764,
"objective": 0.4166857898235321,
"ranking_idealized": 0.7208333611488342,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.5458333492279053,
"regularize": 0.4166857898235321,
"step": 45
},
{
"dpo_loss": 0.637481153011322,
"epoch": 0.2834199338686821,
"grad_norm": 12.595130895032831,
"learning_rate": 2.8409090909090916e-06,
"logits": -1.604835033416748,
"logps": -87.7275161743164,
"loss": 0.3566,
"objective": 0.3444138169288635,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.42500001192092896,
"ranking_simple": 0.5458333492279053,
"regularize": 0.3444138169288635,
"step": 50
},
{
"epoch": 0.2834199338686821,
"eval_dpo_loss": 0.6897606253623962,
"eval_logits": -1.6203083992004395,
"eval_logps": -96.4831314086914,
"eval_loss": 0.413291871547699,
"eval_objective": 0.4237224757671356,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5165289044380188,
"eval_regularize": 0.4237224757671356,
"eval_runtime": 259.6612,
"eval_samples_per_second": 22.298,
"eval_steps_per_second": 0.932,
"step": 50
},
{
"dpo_loss": 0.6459915041923523,
"epoch": 0.3117619272555503,
"grad_norm": 12.272365561333357,
"learning_rate": 3.125e-06,
"logits": -1.6675435304641724,
"logps": -91.76095581054688,
"loss": 0.3508,
"objective": 0.359805166721344,
"ranking_idealized": 0.7124999761581421,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.5625,
"regularize": 0.359805166721344,
"step": 55
},
{
"dpo_loss": 0.6238431334495544,
"epoch": 0.3401039206424185,
"grad_norm": 13.401317383285344,
"learning_rate": 3.409090909090909e-06,
"logits": -1.6435742378234863,
"logps": -88.34529113769531,
"loss": 0.3357,
"objective": 0.3332988917827606,
"ranking_idealized": 0.7041666507720947,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5791666507720947,
"regularize": 0.3332988917827606,
"step": 60
},
{
"dpo_loss": 0.6304014325141907,
"epoch": 0.3684459140292867,
"grad_norm": 12.713715258382189,
"learning_rate": 3.6931818181818186e-06,
"logits": -1.5630245208740234,
"logps": -86.62226867675781,
"loss": 0.3325,
"objective": 0.3380378782749176,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5458333492279053,
"regularize": 0.3380378782749176,
"step": 65
},
{
"dpo_loss": 0.627398669719696,
"epoch": 0.39678790741615494,
"grad_norm": 12.34517477378158,
"learning_rate": 3.9772727272727275e-06,
"logits": -1.519757628440857,
"logps": -88.53465270996094,
"loss": 0.3261,
"objective": 0.3733421266078949,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5833333134651184,
"regularize": 0.3733421266078949,
"step": 70
},
{
"dpo_loss": 0.618600070476532,
"epoch": 0.42512990080302315,
"grad_norm": 13.394680047264217,
"learning_rate": 4.2613636363636365e-06,
"logits": -1.444022297859192,
"logps": -86.64772033691406,
"loss": 0.3211,
"objective": 0.3317987322807312,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.612500011920929,
"regularize": 0.3317987322807312,
"step": 75
},
{
"dpo_loss": 0.6121571660041809,
"epoch": 0.45347189418989137,
"grad_norm": 12.637476273331325,
"learning_rate": 4.5454545454545455e-06,
"logits": -1.5524269342422485,
"logps": -82.45478057861328,
"loss": 0.3164,
"objective": 0.33297327160835266,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.5833333134651184,
"regularize": 0.33297327160835266,
"step": 80
},
{
"dpo_loss": 0.5934053659439087,
"epoch": 0.4818138875767596,
"grad_norm": 11.681739450816789,
"learning_rate": 4.829545454545455e-06,
"logits": -1.5023764371871948,
"logps": -83.81694030761719,
"loss": 0.3085,
"objective": 0.2888755202293396,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6166666746139526,
"regularize": 0.2888755202293396,
"step": 85
},
{
"dpo_loss": 0.6050879955291748,
"epoch": 0.5101558809636277,
"grad_norm": 11.586500005637587,
"learning_rate": 4.999921328558333e-06,
"logits": -1.246580958366394,
"logps": -88.33258819580078,
"loss": 0.3085,
"objective": 0.3082594871520996,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.5458333492279053,
"regularize": 0.3082594871520996,
"step": 90
},
{
"dpo_loss": 0.5785138010978699,
"epoch": 0.538497874350496,
"grad_norm": 11.1585869550919,
"learning_rate": 4.999036331701828e-06,
"logits": -1.2436394691467285,
"logps": -85.56443786621094,
"loss": 0.3059,
"objective": 0.293775349855423,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6208333373069763,
"regularize": 0.293775349855423,
"step": 95
},
{
"dpo_loss": 0.5692493319511414,
"epoch": 0.5668398677373642,
"grad_norm": 10.57766394132754,
"learning_rate": 4.997168347957521e-06,
"logits": -1.3053488731384277,
"logps": -83.31777954101562,
"loss": 0.3027,
"objective": 0.31831109523773193,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6083333492279053,
"regularize": 0.31831109523773193,
"step": 100
},
{
"epoch": 0.5668398677373642,
"eval_dpo_loss": 0.686188817024231,
"eval_logits": -1.3063371181488037,
"eval_logps": -88.41004180908203,
"eval_loss": 0.41417956352233887,
"eval_objective": 0.41506800055503845,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5216942429542542,
"eval_regularize": 0.41506800055503845,
"eval_runtime": 259.1694,
"eval_samples_per_second": 22.341,
"eval_steps_per_second": 0.934,
"step": 100
},
{
"dpo_loss": 0.5781983733177185,
"epoch": 0.5951818611242324,
"grad_norm": 10.454612496016592,
"learning_rate": 4.994318112090048e-06,
"logits": -1.1954314708709717,
"logps": -83.58454132080078,
"loss": 0.3006,
"objective": 0.31248193979263306,
"ranking_idealized": 0.7083333134651184,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.637499988079071,
"regularize": 0.31248193979263306,
"step": 105
},
{
"dpo_loss": 0.5739426016807556,
"epoch": 0.6235238545111006,
"grad_norm": 10.705962678396897,
"learning_rate": 4.990486745229364e-06,
"logits": -1.4220352172851562,
"logps": -82.55160522460938,
"loss": 0.3033,
"objective": 0.31312334537506104,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.4583333432674408,
"ranking_simple": 0.6000000238418579,
"regularize": 0.31312334537506104,
"step": 110
},
{
"dpo_loss": 0.5884331464767456,
"epoch": 0.6518658478979689,
"grad_norm": 10.243480321598604,
"learning_rate": 4.985675754429744e-06,
"logits": -1.5664387941360474,
"logps": -80.2437515258789,
"loss": 0.2914,
"objective": 0.29273518919944763,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5708333253860474,
"regularize": 0.29273518919944763,
"step": 115
},
{
"dpo_loss": 0.5871846079826355,
"epoch": 0.680207841284837,
"grad_norm": 9.272918607237454,
"learning_rate": 4.9798870320769884e-06,
"logits": -1.5533816814422607,
"logps": -77.25326538085938,
"loss": 0.2962,
"objective": 0.27850577235221863,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6166666746139526,
"regularize": 0.27850577235221863,
"step": 120
},
{
"dpo_loss": 0.5849189162254333,
"epoch": 0.7085498346717053,
"grad_norm": 9.087139481090837,
"learning_rate": 4.973122855144066e-06,
"logits": -1.4378304481506348,
"logps": -76.45697784423828,
"loss": 0.286,
"objective": 0.27717408537864685,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6541666388511658,
"regularize": 0.27717408537864685,
"step": 125
},
{
"dpo_loss": 0.5918333530426025,
"epoch": 0.7368918280585735,
"grad_norm": 9.798401176649499,
"learning_rate": 4.965385884295467e-06,
"logits": -1.5077089071273804,
"logps": -75.78624725341797,
"loss": 0.2914,
"objective": 0.29082292318344116,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.4541666805744171,
"ranking_simple": 0.5625,
"regularize": 0.29082292318344116,
"step": 130
},
{
"dpo_loss": 0.5844586491584778,
"epoch": 0.7652338214454416,
"grad_norm": 9.388728292193363,
"learning_rate": 4.956679162840646e-06,
"logits": -1.4935728311538696,
"logps": -77.08039093017578,
"loss": 0.2751,
"objective": 0.27092453837394714,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.5708333253860474,
"regularize": 0.27092453837394714,
"step": 135
},
{
"dpo_loss": 0.5776726007461548,
"epoch": 0.7935758148323099,
"grad_norm": 9.368061718815978,
"learning_rate": 4.947006115536947e-06,
"logits": -1.344637393951416,
"logps": -79.50406646728516,
"loss": 0.278,
"objective": 0.272522896528244,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.5708333253860474,
"regularize": 0.272522896528244,
"step": 140
},
{
"dpo_loss": 0.583095908164978,
"epoch": 0.821917808219178,
"grad_norm": 8.887316448589162,
"learning_rate": 4.9363705472424825e-06,
"logits": -1.260974645614624,
"logps": -80.34443664550781,
"loss": 0.2702,
"objective": 0.27244552969932556,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.5791666507720947,
"regularize": 0.27244552969932556,
"step": 145
},
{
"dpo_loss": 0.5877144932746887,
"epoch": 0.8502598016060463,
"grad_norm": 8.678793910307522,
"learning_rate": 4.924776641419513e-06,
"logits": -1.168263554573059,
"logps": -79.24138641357422,
"loss": 0.2706,
"objective": 0.26840564608573914,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6625000238418579,
"regularize": 0.26840564608573914,
"step": 150
},
{
"epoch": 0.8502598016060463,
"eval_dpo_loss": 0.6856931447982788,
"eval_logits": -1.1981431245803833,
"eval_logps": -87.367431640625,
"eval_loss": 0.42615318298339844,
"eval_objective": 0.42774394154548645,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5278925895690918,
"eval_regularize": 0.42774394154548645,
"eval_runtime": 258.5616,
"eval_samples_per_second": 22.393,
"eval_steps_per_second": 0.936,
"step": 150
},
{
"dpo_loss": 0.5757229924201965,
"epoch": 0.8786017949929145,
"grad_norm": 9.10049250548801,
"learning_rate": 4.9122289584888926e-06,
"logits": -1.1876070499420166,
"logps": -79.34397888183594,
"loss": 0.2634,
"objective": 0.2684144973754883,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6416666507720947,
"regularize": 0.2684144973754883,
"step": 155
},
{
"dpo_loss": 0.5666177868843079,
"epoch": 0.9069437883797827,
"grad_norm": 8.820116531265606,
"learning_rate": 4.8987324340362445e-06,
"logits": -1.129431962966919,
"logps": -79.80657196044922,
"loss": 0.2548,
"objective": 0.24889370799064636,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.6208333373069763,
"regularize": 0.24889370799064636,
"step": 160
},
{
"dpo_loss": 0.5674318671226501,
"epoch": 0.9352857817666509,
"grad_norm": 8.768643401715794,
"learning_rate": 4.884292376870567e-06,
"logits": -1.08602774143219,
"logps": -80.07511138916016,
"loss": 0.2569,
"objective": 0.24699881672859192,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6416666507720947,
"regularize": 0.24699881672859192,
"step": 165
},
{
"dpo_loss": 0.5818125605583191,
"epoch": 0.9636277751535192,
"grad_norm": 8.727560270319945,
"learning_rate": 4.868914466936038e-06,
"logits": -1.1042813062667847,
"logps": -81.02680206298828,
"loss": 0.2644,
"objective": 0.2629285454750061,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6166666746139526,
"regularize": 0.2629285454750061,
"step": 170
},
{
"dpo_loss": 0.5690818428993225,
"epoch": 0.9919697685403873,
"grad_norm": 9.360612159540104,
"learning_rate": 4.8526047530778175e-06,
"logits": -1.0876260995864868,
"logps": -81.20608520507812,
"loss": 0.257,
"objective": 0.2705513834953308,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.675000011920929,
"regularize": 0.2705513834953308,
"step": 175
},
{
"dpo_loss": 0.5392836332321167,
"epoch": 1.0203117619272555,
"grad_norm": 8.811963294729349,
"learning_rate": 4.835369650662767e-06,
"logits": -1.2233085632324219,
"logps": -79.34477233886719,
"loss": 0.2467,
"objective": 0.25315728783607483,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.6499999761581421,
"regularize": 0.25315728783607483,
"step": 180
},
{
"dpo_loss": 0.5429127812385559,
"epoch": 1.0486537553141237,
"grad_norm": 7.990993827783984,
"learning_rate": 4.817215939055984e-06,
"logits": -1.0636595487594604,
"logps": -77.5766830444336,
"loss": 0.2395,
"objective": 0.23463593423366547,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.6083333492279053,
"regularize": 0.23463593423366547,
"step": 185
},
{
"dpo_loss": 0.5637180209159851,
"epoch": 1.076995748700992,
"grad_norm": 8.256999336661497,
"learning_rate": 4.798150758954164e-06,
"logits": -1.0749539136886597,
"logps": -78.55563354492188,
"loss": 0.2191,
"objective": 0.21444876492023468,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.6541666388511658,
"regularize": 0.2144487500190735,
"step": 190
},
{
"dpo_loss": 0.5680408477783203,
"epoch": 1.10533774208786,
"grad_norm": 8.413792498503495,
"learning_rate": 4.778181609576832e-06,
"logits": -1.1007658243179321,
"logps": -77.28519439697266,
"loss": 0.2178,
"objective": 0.21195411682128906,
"ranking_idealized": 0.7041666507720947,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.6666666865348816,
"regularize": 0.21195411682128906,
"step": 195
},
{
"dpo_loss": 0.5640944838523865,
"epoch": 1.1336797354747283,
"grad_norm": 7.655804437668192,
"learning_rate": 4.757316345716554e-06,
"logits": -1.2176551818847656,
"logps": -76.59423828125,
"loss": 0.2256,
"objective": 0.2188282459974289,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6333333253860474,
"regularize": 0.2188282459974289,
"step": 200
},
{
"epoch": 1.1336797354747283,
"eval_dpo_loss": 0.6861998438835144,
"eval_logits": -1.2023168802261353,
"eval_logps": -81.81192016601562,
"eval_loss": 0.43468645215034485,
"eval_objective": 0.434445858001709,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5247933864593506,
"eval_regularize": 0.434445858001709,
"eval_runtime": 259.1196,
"eval_samples_per_second": 22.345,
"eval_steps_per_second": 0.934,
"step": 200
},
{
"dpo_loss": 0.5600470304489136,
"epoch": 1.1620217288615966,
"grad_norm": 7.695005611618307,
"learning_rate": 4.735563174649278e-06,
"logits": -1.1184250116348267,
"logps": -77.62281799316406,
"loss": 0.2243,
"objective": 0.22593899071216583,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.6000000238418579,
"regularize": 0.22593899071216583,
"step": 205
},
{
"dpo_loss": 0.5418952107429504,
"epoch": 1.1903637222484649,
"grad_norm": 7.898188189840939,
"learning_rate": 4.7129306529060415e-06,
"logits": -1.0059646368026733,
"logps": -78.90467834472656,
"loss": 0.2215,
"objective": 0.21866732835769653,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.612500011920929,
"regularize": 0.21866732835769653,
"step": 210
},
{
"dpo_loss": 0.5478299856185913,
"epoch": 1.2187057156353331,
"grad_norm": 8.602045809831973,
"learning_rate": 4.68942768290728e-06,
"logits": -0.9527910947799683,
"logps": -78.7351303100586,
"loss": 0.2113,
"objective": 0.21119125187397003,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.625,
"regularize": 0.21119123697280884,
"step": 215
},
{
"dpo_loss": 0.5662825107574463,
"epoch": 1.2470477090222012,
"grad_norm": 8.077787041174624,
"learning_rate": 4.665063509461098e-06,
"logits": -0.8720958828926086,
"logps": -76.756591796875,
"loss": 0.2138,
"objective": 0.20959888398647308,
"ranking_idealized": 0.7208333611488342,
"ranking_idealized_expo": 0.6041666865348816,
"ranking_simple": 0.6958333253860474,
"regularize": 0.20959888398647308,
"step": 220
},
{
"dpo_loss": 0.5461317300796509,
"epoch": 1.2753897024090695,
"grad_norm": 7.874404608360002,
"learning_rate": 4.639847716126855e-06,
"logits": -0.9673039317131042,
"logps": -78.00302124023438,
"loss": 0.2154,
"objective": 0.20724421739578247,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6416666507720947,
"regularize": 0.20724421739578247,
"step": 225
},
{
"dpo_loss": 0.5674420595169067,
"epoch": 1.3037316957959377,
"grad_norm": 7.50398022531709,
"learning_rate": 4.613790221445511e-06,
"logits": -0.9144200682640076,
"logps": -78.14189910888672,
"loss": 0.2025,
"objective": 0.21042712032794952,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.637499988079071,
"regularize": 0.21042712032794952,
"step": 230
},
{
"dpo_loss": 0.5355426669120789,
"epoch": 1.3320736891828058,
"grad_norm": 7.978556347314524,
"learning_rate": 4.586901275038201e-06,
"logits": -1.0436056852340698,
"logps": -76.17823791503906,
"loss": 0.2072,
"objective": 0.19967219233512878,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.6291666626930237,
"regularize": 0.19967219233512878,
"step": 235
},
{
"dpo_loss": 0.5373556613922119,
"epoch": 1.360415682569674,
"grad_norm": 7.6373820167513635,
"learning_rate": 4.559191453574582e-06,
"logits": -0.9692198038101196,
"logps": -78.42900085449219,
"loss": 0.1972,
"objective": 0.19282637536525726,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.6458333134651184,
"regularize": 0.19282637536525726,
"step": 240
},
{
"dpo_loss": 0.5545187592506409,
"epoch": 1.3887576759565423,
"grad_norm": 7.35404104460057,
"learning_rate": 4.530671656612544e-06,
"logits": -0.992374062538147,
"logps": -76.8333969116211,
"loss": 0.1965,
"objective": 0.19396263360977173,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.625,
"regularize": 0.19396263360977173,
"step": 245
},
{
"dpo_loss": 0.536364734172821,
"epoch": 1.4170996693434104,
"grad_norm": 7.62812382511743,
"learning_rate": 4.501353102310901e-06,
"logits": -0.9773390293121338,
"logps": -76.92967224121094,
"loss": 0.2005,
"objective": 0.2195345014333725,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.625,
"regularize": 0.2195345014333725,
"step": 250
},
{
"epoch": 1.4170996693434104,
"eval_dpo_loss": 0.6815471053123474,
"eval_logits": -1.0615853071212769,
"eval_logps": -81.82124328613281,
"eval_loss": 0.42915773391723633,
"eval_objective": 0.4288579821586609,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5227272510528564,
"eval_regularize": 0.4288579821586609,
"eval_runtime": 259.2538,
"eval_samples_per_second": 22.333,
"eval_steps_per_second": 0.933,
"step": 250
},
{
"dpo_loss": 0.5536395907402039,
"epoch": 1.4454416627302786,
"grad_norm": 7.608994157416838,
"learning_rate": 4.4712473230167775e-06,
"logits": -0.9322084784507751,
"logps": -77.44235229492188,
"loss": 0.1968,
"objective": 0.1830952763557434,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6499999761581421,
"regularize": 0.1830952763557434,
"step": 255
},
{
"dpo_loss": 0.5560018420219421,
"epoch": 1.473783656117147,
"grad_norm": 7.9576485937673995,
"learning_rate": 4.440366160729393e-06,
"logits": -0.9907886385917664,
"logps": -77.4918212890625,
"loss": 0.197,
"objective": 0.20965854823589325,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.6333333253860474,
"regularize": 0.20965854823589325,
"step": 260
},
{
"dpo_loss": 0.5228397250175476,
"epoch": 1.5021256495040152,
"grad_norm": 7.710193409626713,
"learning_rate": 4.4087217624420595e-06,
"logits": -0.9860392808914185,
"logps": -76.1131362915039,
"loss": 0.1993,
"objective": 0.18088673055171967,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6333333253860474,
"regularize": 0.18088671565055847,
"step": 265
},
{
"dpo_loss": 0.5495325922966003,
"epoch": 1.5304676428908834,
"grad_norm": 7.266856200970056,
"learning_rate": 4.376326575364206e-06,
"logits": -0.9504061341285706,
"logps": -77.44710540771484,
"loss": 0.1911,
"objective": 0.1995639055967331,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.637499988079071,
"regularize": 0.1995639055967331,
"step": 270
},
{
"dpo_loss": 0.5499274730682373,
"epoch": 1.5588096362777515,
"grad_norm": 7.524550189383748,
"learning_rate": 4.34319334202531e-06,
"logits": -0.959584653377533,
"logps": -77.12781524658203,
"loss": 0.1924,
"objective": 0.20593222975730896,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6833333373069763,
"regularize": 0.20593222975730896,
"step": 275
},
{
"dpo_loss": 0.5305771827697754,
"epoch": 1.5871516296646198,
"grad_norm": 7.773566185560746,
"learning_rate": 4.309335095262675e-06,
"logits": -0.855711042881012,
"logps": -74.90408325195312,
"loss": 0.1869,
"objective": 0.18780963122844696,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6416666507720947,
"regularize": 0.18780963122844696,
"step": 280
},
{
"dpo_loss": 0.5325539112091064,
"epoch": 1.615493623051488,
"grad_norm": 7.526664818437735,
"learning_rate": 4.274765153095008e-06,
"logits": -0.8781672120094299,
"logps": -76.42524719238281,
"loss": 0.1948,
"objective": 0.18717069923877716,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6583333611488342,
"regularize": 0.18717069923877716,
"step": 285
},
{
"dpo_loss": 0.5575224757194519,
"epoch": 1.643835616438356,
"grad_norm": 6.835237556478757,
"learning_rate": 4.239497113483819e-06,
"logits": -0.8322954773902893,
"logps": -74.6546859741211,
"loss": 0.1789,
"objective": 0.182403564453125,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.6625000238418579,
"regularize": 0.182403564453125,
"step": 290
},
{
"dpo_loss": 0.5432038903236389,
"epoch": 1.6721776098252243,
"grad_norm": 6.961571934316881,
"learning_rate": 4.203544848984729e-06,
"logits": -0.8109145760536194,
"logps": -72.75186920166016,
"loss": 0.1859,
"objective": 0.18214626610279083,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.6416666507720947,
"regularize": 0.18214626610279083,
"step": 295
},
{
"dpo_loss": 0.5352488160133362,
"epoch": 1.7005196032120926,
"grad_norm": 7.403474665890531,
"learning_rate": 4.16692250129073e-06,
"logits": -0.9637666344642639,
"logps": -74.78925323486328,
"loss": 0.187,
"objective": 0.20280833542346954,
"ranking_idealized": 0.7083333134651184,
"ranking_idealized_expo": 0.574999988079071,
"ranking_simple": 0.6958333253860474,
"regularize": 0.20280833542346954,
"step": 300
},
{
"epoch": 1.7005196032120926,
"eval_dpo_loss": 0.6845287680625916,
"eval_logits": -1.0398027896881104,
"eval_logps": -80.0077133178711,
"eval_loss": 0.43694496154785156,
"eval_objective": 0.43617644906044006,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5258264541625977,
"eval_regularize": 0.43617644906044006,
"eval_runtime": 259.8028,
"eval_samples_per_second": 22.286,
"eval_steps_per_second": 0.931,
"step": 300
},
{
"dpo_loss": 0.5487022399902344,
"epoch": 1.7288615965989607,
"grad_norm": 7.543811016492746,
"learning_rate": 4.129644475669617e-06,
"logits": -0.9759048223495483,
"logps": -74.88541412353516,
"loss": 0.1848,
"objective": 0.18824300169944763,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.625,
"regularize": 0.18824300169944763,
"step": 305
},
{
"dpo_loss": 0.5396175980567932,
"epoch": 1.7572035899858292,
"grad_norm": 7.45667126547559,
"learning_rate": 4.091725435297721e-06,
"logits": -1.014137625694275,
"logps": -71.69686126708984,
"loss": 0.187,
"objective": 0.18091975152492523,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6499999761581421,
"regularize": 0.18091975152492523,
"step": 310
},
{
"dpo_loss": 0.5542029142379761,
"epoch": 1.7855455833726972,
"grad_norm": 7.022791945824948,
"learning_rate": 4.053180295492203e-06,
"logits": -0.9052151441574097,
"logps": -72.4874038696289,
"loss": 0.1773,
"objective": 0.17407093942165375,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.612500011920929,
"regularize": 0.17407093942165375,
"step": 315
},
{
"dpo_loss": 0.5506651401519775,
"epoch": 1.8138875767595655,
"grad_norm": 7.473980140791967,
"learning_rate": 4.014024217844167e-06,
"logits": -0.9277843832969666,
"logps": -75.52190399169922,
"loss": 0.1782,
"objective": 0.19924014806747437,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.637499988079071,
"regularize": 0.19924013316631317,
"step": 320
},
{
"dpo_loss": 0.5471735000610352,
"epoch": 1.8422295701464337,
"grad_norm": 7.500911105092317,
"learning_rate": 3.974272604254906e-06,
"logits": -0.9084165096282959,
"logps": -77.74280548095703,
"loss": 0.1791,
"objective": 0.18182264268398285,
"ranking_idealized": 0.7166666388511658,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6875,
"regularize": 0.18182264268398285,
"step": 325
},
{
"dpo_loss": 0.5376237034797668,
"epoch": 1.8705715635333018,
"grad_norm": 6.956332723343249,
"learning_rate": 3.933941090877615e-06,
"logits": -0.6826075911521912,
"logps": -74.37185668945312,
"loss": 0.1748,
"objective": 0.15574544668197632,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.6499999761581421,
"regularize": 0.15574544668197632,
"step": 330
},
{
"dpo_loss": 0.5471087694168091,
"epoch": 1.89891355692017,
"grad_norm": 7.004792858354732,
"learning_rate": 3.893045541966975e-06,
"logits": -0.8625032901763916,
"logps": -73.06546020507812,
"loss": 0.1666,
"objective": 0.18181832134723663,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.6291666626930237,
"regularize": 0.18181832134723663,
"step": 335
},
{
"dpo_loss": 0.5468536615371704,
"epoch": 1.9272555503070383,
"grad_norm": 7.263082545218723,
"learning_rate": 3.8516020436389945e-06,
"logits": -0.8647869825363159,
"logps": -74.24337005615234,
"loss": 0.1658,
"objective": 0.16866040229797363,
"ranking_idealized": 0.7291666865348816,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.7166666388511658,
"regularize": 0.16866040229797363,
"step": 340
},
{
"dpo_loss": 0.5612522959709167,
"epoch": 1.9555975436939064,
"grad_norm": 7.17931193885588,
"learning_rate": 3.8096268975436045e-06,
"logits": -0.9274277091026306,
"logps": -72.64501953125,
"loss": 0.1631,
"objective": 0.1834598332643509,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6041666865348816,
"regularize": 0.1834598332643509,
"step": 345
},
{
"dpo_loss": 0.5486599802970886,
"epoch": 1.9839395370807746,
"grad_norm": 7.4574942962802675,
"learning_rate": 3.767136614452458e-06,
"logits": -0.8831450343132019,
"logps": -75.68425750732422,
"loss": 0.1664,
"objective": 0.15772633254528046,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.6166666746139526,
"regularize": 0.15772633254528046,
"step": 350
},
{
"epoch": 1.9839395370807746,
"eval_dpo_loss": 0.6841984987258911,
"eval_logits": -0.9982038140296936,
"eval_logps": -79.63081359863281,
"eval_loss": 0.4382030665874481,
"eval_objective": 0.4358615577220917,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5289255976676941,
"eval_regularize": 0.4358615577220917,
"eval_runtime": 258.5329,
"eval_samples_per_second": 22.396,
"eval_steps_per_second": 0.936,
"step": 350
},
{
"dpo_loss": 0.5294139981269836,
"epoch": 2.012281530467643,
"grad_norm": 7.097987961305298,
"learning_rate": 3.724147907764478e-06,
"logits": -0.8353627324104309,
"logps": -75.18488311767578,
"loss": 0.1556,
"objective": 0.14675287902355194,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.675000011920929,
"regularize": 0.14675287902355194,
"step": 355
},
{
"dpo_loss": 0.5278249979019165,
"epoch": 2.040623523854511,
"grad_norm": 7.097858061871919,
"learning_rate": 3.6806776869317074e-06,
"logits": -0.8662230372428894,
"logps": -74.37833404541016,
"loss": 0.1484,
"objective": 0.13966700434684753,
"ranking_idealized": 0.7250000238418579,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6916666626930237,
"regularize": 0.13966700434684753,
"step": 360
},
{
"dpo_loss": 0.5365410447120667,
"epoch": 2.0689655172413794,
"grad_norm": 7.018046135257267,
"learning_rate": 3.6367430508080283e-06,
"logits": -0.9098676443099976,
"logps": -77.11681365966797,
"loss": 0.1521,
"objective": 0.14031733572483063,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6541666388511658,
"regularize": 0.14031733572483063,
"step": 365
},
{
"dpo_loss": 0.5375342965126038,
"epoch": 2.0973075106282475,
"grad_norm": 6.853083763524368,
"learning_rate": 3.5923612809233987e-06,
"logits": -0.8166040778160095,
"logps": -74.46318817138672,
"loss": 0.1422,
"objective": 0.14259177446365356,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.6499999761581421,
"regularize": 0.14259177446365356,
"step": 370
},
{
"dpo_loss": 0.5261290669441223,
"epoch": 2.1256495040151155,
"grad_norm": 6.737212175634408,
"learning_rate": 3.547549834686222e-06,
"logits": -0.7999446392059326,
"logps": -76.43958282470703,
"loss": 0.1396,
"objective": 0.14750143885612488,
"ranking_idealized": 0.7291666865348816,
"ranking_idealized_expo": 0.5958333611488342,
"ranking_simple": 0.7124999761581421,
"regularize": 0.14750143885612488,
"step": 375
},
{
"dpo_loss": 0.5414277911186218,
"epoch": 2.153991497401984,
"grad_norm": 7.256488804126431,
"learning_rate": 3.5023263385165346e-06,
"logits": -0.7813270092010498,
"logps": -75.50519561767578,
"loss": 0.1433,
"objective": 0.14621533453464508,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.675000011920929,
"regularize": 0.14621533453464508,
"step": 380
},
{
"dpo_loss": 0.5473071932792664,
"epoch": 2.182333490788852,
"grad_norm": 7.1971925561639445,
"learning_rate": 3.4567085809127247e-06,
"logits": -0.8520491123199463,
"logps": -77.47991180419922,
"loss": 0.1389,
"objective": 0.13993406295776367,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6583333611488342,
"regularize": 0.13993406295776367,
"step": 385
},
{
"dpo_loss": 0.538723349571228,
"epoch": 2.21067548417572,
"grad_norm": 7.027817565044039,
"learning_rate": 3.410714505454486e-06,
"logits": -0.80887770652771,
"logps": -76.32317352294922,
"loss": 0.1308,
"objective": 0.12663429975509644,
"ranking_idealized": 0.6041666865348816,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.6000000238418579,
"regularize": 0.12663428485393524,
"step": 390
},
{
"dpo_loss": 0.5224890112876892,
"epoch": 2.2390174775625886,
"grad_norm": 6.949068306199585,
"learning_rate": 3.364362203744777e-06,
"logits": -0.8549118638038635,
"logps": -76.35110473632812,
"loss": 0.1437,
"objective": 0.14411191642284393,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.675000011920929,
"regularize": 0.14411191642284393,
"step": 395
},
{
"dpo_loss": 0.5168216228485107,
"epoch": 2.2673594709494567,
"grad_norm": 7.191883909269667,
"learning_rate": 3.3176699082935546e-06,
"logits": -0.9377852082252502,
"logps": -77.43128204345703,
"loss": 0.1368,
"objective": 0.13501186668872833,
"ranking_idealized": 0.7583333253860474,
"ranking_idealized_expo": 0.5791666507720947,
"ranking_simple": 0.737500011920929,
"regularize": 0.13501186668872833,
"step": 400
},
{
"epoch": 2.2673594709494567,
"eval_dpo_loss": 0.6858804225921631,
"eval_logits": -1.0155284404754639,
"eval_logps": -80.20379638671875,
"eval_loss": 0.4407689571380615,
"eval_objective": 0.43782898783683777,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5320248007774353,
"eval_regularize": 0.43782898783683777,
"eval_runtime": 259.3137,
"eval_samples_per_second": 22.328,
"eval_steps_per_second": 0.933,
"step": 400
},
{
"dpo_loss": 0.537171483039856,
"epoch": 2.295701464336325,
"grad_norm": 7.007268345576302,
"learning_rate": 3.2706559853460818e-06,
"logits": -1.0055091381072998,
"logps": -74.74109649658203,
"loss": 0.1383,
"objective": 0.1392410695552826,
"ranking_idealized": 0.737500011920929,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.7083333134651184,
"regularize": 0.1392410695552826,
"step": 405
},
{
"dpo_loss": 0.5326074361801147,
"epoch": 2.324043457723193,
"grad_norm": 6.941825657006022,
"learning_rate": 3.2233389276586325e-06,
"logits": -0.8970204591751099,
"logps": -74.2923355102539,
"loss": 0.1297,
"objective": 0.12273009866476059,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.6458333134651184,
"regularize": 0.12273009866476059,
"step": 410
},
{
"dpo_loss": 0.5270788073539734,
"epoch": 2.3523854511100613,
"grad_norm": 7.366817713585052,
"learning_rate": 3.1757373472244324e-06,
"logits": -0.8971990942955017,
"logps": -74.44580078125,
"loss": 0.1319,
"objective": 0.14558042585849762,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6333333253860474,
"regularize": 0.14558042585849762,
"step": 415
},
{
"dpo_loss": 0.5407485961914062,
"epoch": 2.3807274444969297,
"grad_norm": 7.016879855860433,
"learning_rate": 3.127869967952698e-06,
"logits": -0.8165015578269958,
"logps": -75.92127227783203,
"loss": 0.1297,
"objective": 0.13260656595230103,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.6333333253860474,
"regularize": 0.13260656595230103,
"step": 420
},
{
"dpo_loss": 0.526520848274231,
"epoch": 2.409069437883798,
"grad_norm": 6.605258428992394,
"learning_rate": 3.0797556183036582e-06,
"logits": -0.8272897601127625,
"logps": -74.90123748779297,
"loss": 0.1262,
"objective": 0.12788838148117065,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6499999761581421,
"regularize": 0.12788838148117065,
"step": 425
},
{
"dpo_loss": 0.5405741333961487,
"epoch": 2.4374114312706663,
"grad_norm": 6.6924189063588955,
"learning_rate": 3.0314132238824416e-06,
"logits": -0.8260743021965027,
"logps": -75.06442260742188,
"loss": 0.1266,
"objective": 0.11811564862728119,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6416666507720947,
"regularize": 0.11811564117670059,
"step": 430
},
{
"dpo_loss": 0.5238969922065735,
"epoch": 2.4657534246575343,
"grad_norm": 6.903355919273098,
"learning_rate": 2.9828617999947647e-06,
"logits": -0.8589097857475281,
"logps": -75.69316101074219,
"loss": 0.1249,
"objective": 0.12512782216072083,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6708333492279053,
"regularize": 0.12512782216072083,
"step": 435
},
{
"dpo_loss": 0.5445213317871094,
"epoch": 2.4940954180444024,
"grad_norm": 6.961032298685898,
"learning_rate": 2.9341204441673267e-06,
"logits": -0.7675038576126099,
"logps": -74.55623626708984,
"loss": 0.126,
"objective": 0.12659841775894165,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6208333373069763,
"regularize": 0.12659841775894165,
"step": 440
},
{
"dpo_loss": 0.5370126366615295,
"epoch": 2.5224374114312704,
"grad_norm": 6.94281741073779,
"learning_rate": 2.8852083286358647e-06,
"logits": -0.7942711710929871,
"logps": -71.90986633300781,
"loss": 0.1211,
"objective": 0.13307242095470428,
"ranking_idealized": 0.6958333253860474,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.675000011920929,
"regularize": 0.13307242095470428,
"step": 445
},
{
"dpo_loss": 0.5622422099113464,
"epoch": 2.550779404818139,
"grad_norm": 7.14501016601336,
"learning_rate": 2.8361446928038298e-06,
"logits": -0.8527530431747437,
"logps": -74.12069702148438,
"loss": 0.122,
"objective": 0.13083526492118835,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.612500011920929,
"regularize": 0.13083526492118835,
"step": 450
},
{
"epoch": 2.550779404818139,
"eval_dpo_loss": 0.6862838268280029,
"eval_logits": -0.8946070671081543,
"eval_logps": -78.42880249023438,
"eval_loss": 0.44150272011756897,
"eval_objective": 0.44035181403160095,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5258264541625977,
"eval_regularize": 0.44035181403160095,
"eval_runtime": 258.0077,
"eval_samples_per_second": 22.441,
"eval_steps_per_second": 0.938,
"step": 450
},
{
"dpo_loss": 0.5327574610710144,
"epoch": 2.579121398205007,
"grad_norm": 7.160178764520009,
"learning_rate": 2.7869488356746344e-06,
"logits": -0.8465222716331482,
"logps": -74.07278442382812,
"loss": 0.1216,
"objective": 0.1370854526758194,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6625000238418579,
"regularize": 0.1370854526758194,
"step": 455
},
{
"dpo_loss": 0.5250583291053772,
"epoch": 2.6074633915918755,
"grad_norm": 7.146508258603712,
"learning_rate": 2.7376401082604563e-06,
"logits": -0.8942106366157532,
"logps": -74.29560852050781,
"loss": 0.1165,
"objective": 0.12135622650384903,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6666666865348816,
"regularize": 0.12135622650384903,
"step": 460
},
{
"dpo_loss": 0.530348539352417,
"epoch": 2.6358053849787435,
"grad_norm": 7.184783585057386,
"learning_rate": 2.6882379059705953e-06,
"logits": -0.7349064350128174,
"logps": -73.84181213378906,
"loss": 0.1204,
"objective": 0.1292448341846466,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.6208333373069763,
"regularize": 0.1292448341846466,
"step": 465
},
{
"dpo_loss": 0.5289201140403748,
"epoch": 2.6641473783656116,
"grad_norm": 7.1334661921422216,
"learning_rate": 2.6387616609823506e-06,
"logits": -0.7510494589805603,
"logps": -73.42973327636719,
"loss": 0.1121,
"objective": 0.10491514950990677,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.6541666388511658,
"regularize": 0.10491514950990677,
"step": 470
},
{
"dpo_loss": 0.5439261794090271,
"epoch": 2.69248937175248,
"grad_norm": 7.044559545822627,
"learning_rate": 2.5892308345974517e-06,
"logits": -0.7502660155296326,
"logps": -73.04237365722656,
"loss": 0.1114,
"objective": 0.11930320411920547,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.6583333611488342,
"regularize": 0.11930320411920547,
"step": 475
},
{
"dpo_loss": 0.528633177280426,
"epoch": 2.720831365139348,
"grad_norm": 7.216679182755541,
"learning_rate": 2.53966490958702e-06,
"logits": -0.832193911075592,
"logps": -73.19383239746094,
"loss": 0.1104,
"objective": 0.10962951928377151,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.6916666626930237,
"regularize": 0.10962951928377151,
"step": 480
},
{
"dpo_loss": 0.5266162753105164,
"epoch": 2.7491733585262166,
"grad_norm": 6.942552990674209,
"learning_rate": 2.490083382528097e-06,
"logits": -0.780593752861023,
"logps": -75.74951171875,
"loss": 0.1126,
"objective": 0.10767225921154022,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.675000011920929,
"regularize": 0.10767225176095963,
"step": 485
},
{
"dpo_loss": 0.5204401016235352,
"epoch": 2.7775153519130846,
"grad_norm": 6.79511973951677,
"learning_rate": 2.440505756134732e-06,
"logits": -0.7421233654022217,
"logps": -74.27189636230469,
"loss": 0.1117,
"objective": 0.11147340387105942,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.4749999940395355,
"ranking_simple": 0.6833333373069763,
"regularize": 0.11147340387105942,
"step": 490
},
{
"dpo_loss": 0.5330770015716553,
"epoch": 2.8058573452999527,
"grad_norm": 7.282976503781254,
"learning_rate": 2.3909515315866606e-06,
"logits": -0.7717820405960083,
"logps": -72.36864471435547,
"loss": 0.1034,
"objective": 0.10382074862718582,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.4833333194255829,
"ranking_simple": 0.6333333253860474,
"regularize": 0.10382074862718582,
"step": 495
},
{
"dpo_loss": 0.5220891833305359,
"epoch": 2.8341993386868207,
"grad_norm": 7.004605377489112,
"learning_rate": 2.341440200858589e-06,
"logits": -0.822429895401001,
"logps": -71.28691864013672,
"loss": 0.1063,
"objective": 0.11509209126234055,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6666666865348816,
"regularize": 0.11509209126234055,
"step": 500
},
{
"epoch": 2.8341993386868207,
"eval_dpo_loss": 0.6860550045967102,
"eval_logits": -0.8682713508605957,
"eval_logps": -78.1278076171875,
"eval_loss": 0.44106075167655945,
"eval_objective": 0.43840065598487854,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5299586653709412,
"eval_regularize": 0.43840065598487854,
"eval_runtime": 259.0969,
"eval_samples_per_second": 22.347,
"eval_steps_per_second": 0.934,
"step": 500
},
{
"dpo_loss": 0.5273423790931702,
"epoch": 2.862541332073689,
"grad_norm": 6.758921464396605,
"learning_rate": 2.2919912390530945e-06,
"logits": -0.7519776225090027,
"logps": -72.76760864257812,
"loss": 0.1035,
"objective": 0.10023737698793411,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6833333373069763,
"regularize": 0.10023736953735352,
"step": 505
},
{
"dpo_loss": 0.5383204817771912,
"epoch": 2.8908833254605573,
"grad_norm": 6.749917335321248,
"learning_rate": 2.242624096740164e-06,
"logits": -0.7136736512184143,
"logps": -73.48322296142578,
"loss": 0.1008,
"objective": 0.10292276740074158,
"ranking_idealized": 0.6958333253860474,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.675000011920929,
"regularize": 0.10292276740074158,
"step": 510
},
{
"dpo_loss": 0.5426943302154541,
"epoch": 2.9192253188474258,
"grad_norm": 6.80108683493094,
"learning_rate": 2.193358192306384e-06,
"logits": -0.844546914100647,
"logps": -72.76075744628906,
"loss": 0.1039,
"objective": 0.10740550607442856,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.6583333611488342,
"regularize": 0.10740550607442856,
"step": 515
},
{
"dpo_loss": 0.5363429188728333,
"epoch": 2.947567312234294,
"grad_norm": 7.22597227685905,
"learning_rate": 2.1442129043167877e-06,
"logits": -0.7738971710205078,
"logps": -74.9779281616211,
"loss": 0.102,
"objective": 0.11110852658748627,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6458333134651184,
"regularize": 0.11110852658748627,
"step": 520
},
{
"dpo_loss": 0.5326921343803406,
"epoch": 2.975909305621162,
"grad_norm": 7.020529204895981,
"learning_rate": 2.0952075638923656e-06,
"logits": -0.773563027381897,
"logps": -73.9379653930664,
"loss": 0.0967,
"objective": 0.1049317866563797,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.5708333253860474,
"ranking_simple": 0.699999988079071,
"regularize": 0.1049317866563797,
"step": 525
},
{
"dpo_loss": 0.5151563286781311,
"epoch": 3.0042512990080303,
"grad_norm": 6.663633146942429,
"learning_rate": 2.046361447106244e-06,
"logits": -0.7806794047355652,
"logps": -73.31244659423828,
"loss": 0.0987,
"objective": 0.10096151381731033,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6708333492279053,
"regularize": 0.10096149891614914,
"step": 530
},
{
"dpo_loss": 0.5298264622688293,
"epoch": 3.0325932923948984,
"grad_norm": 6.838429513652437,
"learning_rate": 1.997693767401503e-06,
"logits": -0.8067893981933594,
"logps": -75.07320404052734,
"loss": 0.0914,
"objective": 0.09071025252342224,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6708333492279053,
"regularize": 0.09071025252342224,
"step": 535
},
{
"dpo_loss": 0.5308666825294495,
"epoch": 3.0609352857817664,
"grad_norm": 6.786419151482898,
"learning_rate": 1.9492236680336486e-06,
"logits": -0.8495451807975769,
"logps": -72.48417663574219,
"loss": 0.0869,
"objective": 0.08104575425386429,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6458333134651184,
"regularize": 0.0810457393527031,
"step": 540
},
{
"dpo_loss": 0.5390760898590088,
"epoch": 3.089277279168635,
"grad_norm": 6.715619451066239,
"learning_rate": 1.9009702145406728e-06,
"logits": -0.7783963084220886,
"logps": -73.88996887207031,
"loss": 0.0873,
"objective": 0.08420184254646301,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.49166667461395264,
"ranking_simple": 0.6208333373069763,
"regularize": 0.08420184254646301,
"step": 545
},
{
"dpo_loss": 0.5265496373176575,
"epoch": 3.117619272555503,
"grad_norm": 6.799228974819442,
"learning_rate": 1.852952387243698e-06,
"logits": -0.6418666243553162,
"logps": -73.9046401977539,
"loss": 0.0878,
"objective": 0.09073540568351746,
"ranking_idealized": 0.699999988079071,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.7041666507720947,
"regularize": 0.09073540568351746,
"step": 550
},
{
"epoch": 3.117619272555503,
"eval_dpo_loss": 0.6847736239433289,
"eval_logits": -0.8292139172554016,
"eval_logps": -77.63910675048828,
"eval_loss": 0.44063544273376465,
"eval_objective": 0.43784743547439575,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5330578684806824,
"eval_regularize": 0.43784743547439575,
"eval_runtime": 259.006,
"eval_samples_per_second": 22.355,
"eval_steps_per_second": 0.934,
"step": 550
},
{
"dpo_loss": 0.5151117444038391,
"epoch": 3.1459612659423715,
"grad_norm": 7.0486870886771955,
"learning_rate": 1.8051890737811395e-06,
"logits": -0.6810140013694763,
"logps": -74.01383972167969,
"loss": 0.0855,
"objective": 0.08012814819812775,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6833333373069763,
"regularize": 0.08012814074754715,
"step": 555
},
{
"dpo_loss": 0.5303942561149597,
"epoch": 3.1743032593292395,
"grad_norm": 6.686572075133422,
"learning_rate": 1.7576990616793139e-06,
"logits": -0.7435484528541565,
"logps": -70.75598907470703,
"loss": 0.0855,
"objective": 0.07928713411092758,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6416666507720947,
"regularize": 0.07928712666034698,
"step": 560
},
{
"dpo_loss": 0.5257388353347778,
"epoch": 3.2026452527161076,
"grad_norm": 7.035317567433199,
"learning_rate": 1.7105010309624381e-06,
"logits": -0.7857434153556824,
"logps": -71.99687194824219,
"loss": 0.0807,
"objective": 0.07652737945318222,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6833333373069763,
"regularize": 0.07652737945318222,
"step": 565
},
{
"dpo_loss": 0.5218066573143005,
"epoch": 3.230987246102976,
"grad_norm": 6.567722467858525,
"learning_rate": 1.6636135468049122e-06,
"logits": -0.7239015698432922,
"logps": -72.04088592529297,
"loss": 0.0814,
"objective": 0.07984793186187744,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.6291666626930237,
"regularize": 0.07984793186187744,
"step": 570
},
{
"dpo_loss": 0.5289373397827148,
"epoch": 3.259329239489844,
"grad_norm": 6.699684239267555,
"learning_rate": 1.617055052228768e-06,
"logits": -0.7765447497367859,
"logps": -72.74471282958984,
"loss": 0.0823,
"objective": 0.08229862153530121,
"ranking_idealized": 0.7083333134651184,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6958333253860474,
"regularize": 0.08229862153530121,
"step": 575
},
{
"dpo_loss": 0.5433183908462524,
"epoch": 3.287671232876712,
"grad_norm": 6.701807007044451,
"learning_rate": 1.5708438608491816e-06,
"logits": -0.7891409397125244,
"logps": -73.1654052734375,
"loss": 0.0794,
"objective": 0.07175194472074509,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.6499999761581421,
"regularize": 0.0717519223690033,
"step": 580
},
{
"dpo_loss": 0.5400884747505188,
"epoch": 3.3160132262635806,
"grad_norm": 7.671988032753608,
"learning_rate": 1.524998149670871e-06,
"logits": -0.817208468914032,
"logps": -74.3894271850586,
"loss": 0.0807,
"objective": 0.0763852447271347,
"ranking_idealized": 0.6958333253860474,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6958333253860474,
"regularize": 0.0763852447271347,
"step": 585
},
{
"dpo_loss": 0.539016604423523,
"epoch": 3.3443552196504487,
"grad_norm": 7.045114972527912,
"learning_rate": 1.479535951938243e-06,
"logits": -0.8479073643684387,
"logps": -74.54483795166016,
"loss": 0.0782,
"objective": 0.07480078190565109,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.6708333492279053,
"regularize": 0.07480078190565109,
"step": 590
},
{
"dpo_loss": 0.5274596810340881,
"epoch": 3.372697213037317,
"grad_norm": 6.813876249894319,
"learning_rate": 1.43447515004208e-06,
"logits": -0.7586421966552734,
"logps": -73.8796615600586,
"loss": 0.0725,
"objective": 0.06939196586608887,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6499999761581421,
"regularize": 0.06939195841550827,
"step": 595
},
{
"dpo_loss": 0.507723331451416,
"epoch": 3.4010392064241852,
"grad_norm": 6.843883025807739,
"learning_rate": 1.3898334684855647e-06,
"logits": -0.7954932451248169,
"logps": -73.50051879882812,
"loss": 0.0719,
"objective": 0.0723666176199913,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6875,
"regularize": 0.0723666176199913,
"step": 600
},
{
"epoch": 3.4010392064241852,
"eval_dpo_loss": 0.6851052045822144,
"eval_logits": -0.8875375390052795,
"eval_logps": -77.49230194091797,
"eval_loss": 0.439556360244751,
"eval_objective": 0.43727535009384155,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5309917330741882,
"eval_regularize": 0.43727535009384155,
"eval_runtime": 260.0756,
"eval_samples_per_second": 22.263,
"eval_steps_per_second": 0.93,
"step": 600
},
{
"dpo_loss": 0.539269745349884,
"epoch": 3.4293811998110533,
"grad_norm": 6.836932990419662,
"learning_rate": 1.3456284669124159e-06,
"logits": -0.8135491013526917,
"logps": -75.25065612792969,
"loss": 0.072,
"objective": 0.06646443903446198,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6708333492279053,
"regularize": 0.06646443903446198,
"step": 605
},
{
"dpo_loss": 0.532633364200592,
"epoch": 3.4577231931979218,
"grad_norm": 6.918313902636074,
"learning_rate": 1.301877533199859e-06,
"logits": -0.812556266784668,
"logps": -73.47212219238281,
"loss": 0.0726,
"objective": 0.06124640628695488,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.675000011920929,
"regularize": 0.06124639883637428,
"step": 610
},
{
"dpo_loss": 0.522627055644989,
"epoch": 3.48606518658479,
"grad_norm": 6.525966688786376,
"learning_rate": 1.2585978766191726e-06,
"logits": -0.8209077715873718,
"logps": -74.44559478759766,
"loss": 0.0694,
"objective": 0.0690179094672203,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.47083333134651184,
"ranking_simple": 0.6166666746139526,
"regularize": 0.0690179094672203,
"step": 615
},
{
"dpo_loss": 0.5149637460708618,
"epoch": 3.514407179971658,
"grad_norm": 6.444084091552882,
"learning_rate": 1.2158065210664848e-06,
"logits": -0.734274685382843,
"logps": -73.22693634033203,
"loss": 0.0739,
"objective": 0.06696704030036926,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.6333333253860474,
"regularize": 0.06696703284978867,
"step": 620
},
{
"dpo_loss": 0.5303381085395813,
"epoch": 3.5427491733585263,
"grad_norm": 7.253457560302881,
"learning_rate": 1.1735202983664803e-06,
"logits": -0.763002336025238,
"logps": -71.75556182861328,
"loss": 0.0684,
"objective": 0.06924500316381454,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.675000011920929,
"regularize": 0.06924500316381454,
"step": 625
},
{
"dpo_loss": 0.5344926118850708,
"epoch": 3.5710911667453944,
"grad_norm": 7.160250572063496,
"learning_rate": 1.1317558416516696e-06,
"logits": -0.8248269557952881,
"logps": -72.7044677734375,
"loss": 0.0685,
"objective": 0.06329541653394699,
"ranking_idealized": 0.6333333253860474,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6416666507720947,
"regularize": 0.0632954090833664,
"step": 630
},
{
"dpo_loss": 0.5331679582595825,
"epoch": 3.5994331601322624,
"grad_norm": 6.5609855806488575,
"learning_rate": 1.0905295788197993e-06,
"logits": -0.7775312066078186,
"logps": -72.57146453857422,
"loss": 0.0645,
"objective": 0.06953860074281693,
"ranking_idealized": 0.637499988079071,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.6208333373069763,
"regularize": 0.06953860074281693,
"step": 635
},
{
"dpo_loss": 0.5081437826156616,
"epoch": 3.627775153519131,
"grad_norm": 6.931309552817511,
"learning_rate": 1.049857726072005e-06,
"logits": -0.7663463354110718,
"logps": -74.27519989013672,
"loss": 0.0641,
"objective": 0.06706919521093369,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.6458333134651184,
"regularize": 0.06706918776035309,
"step": 640
},
{
"dpo_loss": 0.5230153799057007,
"epoch": 3.656117146905999,
"grad_norm": 6.607222677286154,
"learning_rate": 1.0097562815342215e-06,
"logits": -0.7920152544975281,
"logps": -71.90898132324219,
"loss": 0.0644,
"objective": 0.06532428413629532,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5249999761581421,
"ranking_simple": 0.6583333611488342,
"regularize": 0.06532428413629532,
"step": 645
},
{
"dpo_loss": 0.5418646335601807,
"epoch": 3.6844591402928675,
"grad_norm": 6.694188599643782,
"learning_rate": 9.702410189643838e-07,
"logits": -0.8044089674949646,
"logps": -72.86306762695312,
"loss": 0.0618,
"objective": 0.05348058044910431,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6499999761581421,
"regularize": 0.05348057672381401,
"step": 650
},
{
"epoch": 3.6844591402928675,
"eval_dpo_loss": 0.6855461001396179,
"eval_logits": -0.9103026986122131,
"eval_logps": -77.18383026123047,
"eval_loss": 0.4394647479057312,
"eval_objective": 0.43862253427505493,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5268595218658447,
"eval_regularize": 0.43862253427505493,
"eval_runtime": 259.3939,
"eval_samples_per_second": 22.321,
"eval_steps_per_second": 0.933,
"step": 650
},
{
"dpo_loss": 0.5326651930809021,
"epoch": 3.7128011336797355,
"grad_norm": 6.747215671656663,
"learning_rate": 9.313274815478698e-07,
"logits": -0.8151116967201233,
"logps": -72.32289123535156,
"loss": 0.0682,
"objective": 0.06774523854255676,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6666666865348816,
"regularize": 0.06774523109197617,
"step": 655
},
{
"dpo_loss": 0.5296677947044373,
"epoch": 3.7411431270666036,
"grad_norm": 6.603209874734441,
"learning_rate": 8.930309757836517e-07,
"logits": -0.8544061779975891,
"logps": -72.76691436767578,
"loss": 0.0605,
"objective": 0.06562329083681107,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.6958333253860474,
"regularize": 0.06562329083681107,
"step": 660
},
{
"dpo_loss": 0.5254350900650024,
"epoch": 3.769485120453472,
"grad_norm": 6.7420568501361,
"learning_rate": 8.553665654635343e-07,
"logits": -0.7711302042007446,
"logps": -72.90251159667969,
"loss": 0.0589,
"objective": 0.05725221708416939,
"ranking_idealized": 0.6958333253860474,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6791666746139526,
"regularize": 0.057252202183008194,
"step": 665
},
{
"dpo_loss": 0.529458224773407,
"epoch": 3.79782711384034,
"grad_norm": 6.596226604829734,
"learning_rate": 8.183490657468687e-07,
"logits": -0.874411940574646,
"logps": -73.00768280029297,
"loss": 0.0592,
"objective": 0.0637064203619957,
"ranking_idealized": 0.7333333492279053,
"ranking_idealized_expo": 0.5958333611488342,
"ranking_simple": 0.7333333492279053,
"regularize": 0.0637064203619957,
"step": 670
},
{
"dpo_loss": 0.5332936644554138,
"epoch": 3.826169107227208,
"grad_norm": 6.620708805759934,
"learning_rate": 7.819930373330669e-07,
"logits": -0.8352341055870056,
"logps": -72.00785827636719,
"loss": 0.0556,
"objective": 0.05346338450908661,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.6666666865348816,
"regularize": 0.05346338450908661,
"step": 675
},
{
"dpo_loss": 0.5433677434921265,
"epoch": 3.8545111006140766,
"grad_norm": 6.502071174130889,
"learning_rate": 7.463127807341966e-07,
"logits": -0.7840080261230469,
"logps": -72.76580047607422,
"loss": 0.0571,
"objective": 0.05716581270098686,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6416666507720947,
"regularize": 0.057165808975696564,
"step": 680
},
{
"dpo_loss": 0.5243024230003357,
"epoch": 3.8828530940009447,
"grad_norm": 6.827039590730102,
"learning_rate": 7.113223306499336e-07,
"logits": -0.8465909361839294,
"logps": -72.07350158691406,
"loss": 0.0572,
"objective": 0.053640857338905334,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6791666746139526,
"regularize": 0.05364084988832474,
"step": 685
},
{
"dpo_loss": 0.5204752087593079,
"epoch": 3.9111950873878127,
"grad_norm": 6.658562071017675,
"learning_rate": 6.770354504470575e-07,
"logits": -0.8739193677902222,
"logps": -71.3963394165039,
"loss": 0.0561,
"objective": 0.054595671594142914,
"ranking_idealized": 0.7041666507720947,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6958333253860474,
"regularize": 0.05459566414356232,
"step": 690
},
{
"dpo_loss": 0.5284795761108398,
"epoch": 3.9395370807746812,
"grad_norm": 6.480093534880764,
"learning_rate": 6.434656267456843e-07,
"logits": -0.8127073049545288,
"logps": -72.72264862060547,
"loss": 0.0567,
"objective": 0.06189308688044548,
"ranking_idealized": 0.6083333492279053,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.6083333492279053,
"regularize": 0.06189308688044548,
"step": 695
},
{
"dpo_loss": 0.5350829362869263,
"epoch": 3.9678790741615493,
"grad_norm": 6.7060271282278245,
"learning_rate": 6.106260641143547e-07,
"logits": -0.833633303642273,
"logps": -73.22267150878906,
"loss": 0.0551,
"objective": 0.05491795390844345,
"ranking_idealized": 0.612500011920929,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.6166666746139526,
"regularize": 0.05491795018315315,
"step": 700
},
{
"epoch": 3.9678790741615493,
"eval_dpo_loss": 0.6859015226364136,
"eval_logits": -0.9137452244758606,
"eval_logps": -77.72093200683594,
"eval_loss": 0.440186470746994,
"eval_objective": 0.43875548243522644,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5289255976676941,
"eval_regularize": 0.43875548243522644,
"eval_runtime": 259.3718,
"eval_samples_per_second": 22.323,
"eval_steps_per_second": 0.933,
"step": 700
},
{
"dpo_loss": 0.5229139924049377,
"epoch": 3.9962210675484178,
"grad_norm": 6.704821856039977,
"learning_rate": 5.785296798760601e-07,
"logits": -0.7782571911811829,
"logps": -72.21749877929688,
"loss": 0.0552,
"objective": 0.048626501113176346,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6666666865348816,
"regularize": 0.04862649738788605,
"step": 705
},
{
"dpo_loss": 0.5162150263786316,
"epoch": 4.024563060935286,
"grad_norm": 7.292565223355315,
"learning_rate": 5.471890990272666e-07,
"logits": -0.814688503742218,
"logps": -73.78315734863281,
"loss": 0.0446,
"objective": 0.045689478516578674,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.4791666567325592,
"ranking_simple": 0.675000011920929,
"regularize": 0.04568947106599808,
"step": 710
},
{
"dpo_loss": 0.5273666977882385,
"epoch": 4.052905054322154,
"grad_norm": 6.71901500064993,
"learning_rate": 5.166166492719124e-07,
"logits": -0.7857570648193359,
"logps": -72.7701187133789,
"loss": 0.0422,
"objective": 0.04041726142168045,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5416666865348816,
"ranking_simple": 0.675000011920929,
"regularize": 0.04041723534464836,
"step": 715
},
{
"dpo_loss": 0.5177367329597473,
"epoch": 4.081247047709022,
"grad_norm": 6.659794774093643,
"learning_rate": 4.868243561723535e-07,
"logits": -0.7260258793830872,
"logps": -75.34712219238281,
"loss": 0.0448,
"objective": 0.041196659207344055,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6583333611488342,
"regularize": 0.04119665548205376,
"step": 720
},
{
"dpo_loss": 0.5408446192741394,
"epoch": 4.109589041095891,
"grad_norm": 6.565784729673605,
"learning_rate": 4.57823938419153e-07,
"logits": -0.786972165107727,
"logps": -72.7632064819336,
"loss": 0.0445,
"objective": 0.04578384384512901,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.6541666388511658,
"regularize": 0.045783836394548416,
"step": 725
},
{
"dpo_loss": 0.5255261659622192,
"epoch": 4.137931034482759,
"grad_norm": 6.9190144367452975,
"learning_rate": 4.2962680322157335e-07,
"logits": -0.8668403625488281,
"logps": -72.80368041992188,
"loss": 0.0469,
"objective": 0.0492975153028965,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.675000011920929,
"regularize": 0.04929749667644501,
"step": 730
},
{
"dpo_loss": 0.5357745885848999,
"epoch": 4.166273027869627,
"grad_norm": 6.713218850119101,
"learning_rate": 4.0224404182059443e-07,
"logits": -0.7645056843757629,
"logps": -74.07119750976562,
"loss": 0.04,
"objective": 0.04291637986898422,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6791666746139526,
"regularize": 0.042916372418403625,
"step": 735
},
{
"dpo_loss": 0.5108519196510315,
"epoch": 4.194615021256495,
"grad_norm": 6.802036362247038,
"learning_rate": 3.756864251262143e-07,
"logits": -0.8028141260147095,
"logps": -73.28968048095703,
"loss": 0.0412,
"objective": 0.044031720608472824,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5208333134651184,
"ranking_simple": 0.6708333492279053,
"regularize": 0.04403171315789223,
"step": 740
},
{
"dpo_loss": 0.5228927731513977,
"epoch": 4.222957014643363,
"grad_norm": 6.653699462048885,
"learning_rate": 3.499643994807486e-07,
"logits": -0.8697967529296875,
"logps": -70.57527160644531,
"loss": 0.0428,
"objective": 0.04283791035413742,
"ranking_idealized": 0.6291666626930237,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.6416666507720947,
"regularize": 0.042837902903556824,
"step": 745
},
{
"dpo_loss": 0.5164041519165039,
"epoch": 4.251299008030231,
"grad_norm": 6.911061908397223,
"learning_rate": 3.250880825498026e-07,
"logits": -0.9160488843917847,
"logps": -72.56361389160156,
"loss": 0.0388,
"objective": 0.03974687308073044,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6541666388511658,
"regularize": 0.03974686935544014,
"step": 750
},
{
"epoch": 4.251299008030231,
"eval_dpo_loss": 0.685931921005249,
"eval_logits": -0.8975909352302551,
"eval_logps": -77.07003021240234,
"eval_loss": 0.4403546452522278,
"eval_objective": 0.438612699508667,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5309917330741882,
"eval_regularize": 0.438612699508667,
"eval_runtime": 259.3145,
"eval_samples_per_second": 22.328,
"eval_steps_per_second": 0.933,
"step": 750
},
{
"dpo_loss": 0.5212615132331848,
"epoch": 4.2796410014171,
"grad_norm": 6.600495039472016,
"learning_rate": 3.0106725934252095e-07,
"logits": -0.8632883429527283,
"logps": -71.9403076171875,
"loss": 0.0401,
"objective": 0.039546623826026917,
"ranking_idealized": 0.6791666746139526,
"ranking_idealized_expo": 0.5333333611488342,
"ranking_simple": 0.6708333492279053,
"regularize": 0.03954662010073662,
"step": 755
},
{
"dpo_loss": 0.5251290798187256,
"epoch": 4.307982994803968,
"grad_norm": 6.496157144727595,
"learning_rate": 2.779113783626916e-07,
"logits": -0.8375190496444702,
"logps": -73.24321746826172,
"loss": 0.0397,
"objective": 0.03997815027832985,
"ranking_idealized": 0.7083333134651184,
"ranking_idealized_expo": 0.5666666626930237,
"ranking_simple": 0.6958333253860474,
"regularize": 0.039978139102458954,
"step": 760
},
{
"dpo_loss": 0.5071607828140259,
"epoch": 4.336324988190836,
"grad_norm": 6.910390218786043,
"learning_rate": 2.5562954789221164e-07,
"logits": -0.8520547747612,
"logps": -73.03939056396484,
"loss": 0.0405,
"objective": 0.036770131438970566,
"ranking_idealized": 0.7291666865348816,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.7291666865348816,
"regularize": 0.03677012771368027,
"step": 765
},
{
"dpo_loss": 0.509007453918457,
"epoch": 4.364666981577704,
"grad_norm": 6.462624726733492,
"learning_rate": 2.3423053240837518e-07,
"logits": -0.7884809970855713,
"logps": -71.66094207763672,
"loss": 0.0396,
"objective": 0.03581638261675835,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.5083333253860474,
"ranking_simple": 0.6666666865348816,
"regularize": 0.03581637516617775,
"step": 770
},
{
"dpo_loss": 0.5204900503158569,
"epoch": 4.393008974964572,
"grad_norm": 6.9115021717677445,
"learning_rate": 2.137227491364016e-07,
"logits": -0.8254011869430542,
"logps": -72.81352233886719,
"loss": 0.0382,
"objective": 0.03518719598650932,
"ranking_idealized": 0.6666666865348816,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6583333611488342,
"regularize": 0.03518717736005783,
"step": 775
},
{
"dpo_loss": 0.5285161733627319,
"epoch": 4.42135096835144,
"grad_norm": 6.553973237097607,
"learning_rate": 1.941142647385469e-07,
"logits": -0.843854546546936,
"logps": -71.42961120605469,
"loss": 0.0393,
"objective": 0.03610792011022568,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.5041666626930237,
"ranking_simple": 0.6541666388511658,
"regularize": 0.03610791638493538,
"step": 780
},
{
"dpo_loss": 0.5424137115478516,
"epoch": 4.449692961738309,
"grad_norm": 6.303662663640985,
"learning_rate": 1.7541279214111277e-07,
"logits": -0.8690065741539001,
"logps": -71.59990692138672,
"loss": 0.0409,
"objective": 0.0452335849404335,
"ranking_idealized": 0.6583333611488342,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6416666507720947,
"regularize": 0.04523357003927231,
"step": 785
},
{
"dpo_loss": 0.5163858532905579,
"epoch": 4.478034955125177,
"grad_norm": 6.732149069961477,
"learning_rate": 1.5762568750059604e-07,
"logits": -0.8400804400444031,
"logps": -73.97518920898438,
"loss": 0.0369,
"objective": 0.042594779282808304,
"ranking_idealized": 0.7291666865348816,
"ranking_idealized_expo": 0.5583333373069763,
"ranking_simple": 0.7124999761581421,
"regularize": 0.04259476438164711,
"step": 790
},
{
"dpo_loss": 0.523327112197876,
"epoch": 4.506376948512045,
"grad_norm": 6.468243868538749,
"learning_rate": 1.4075994731016895e-07,
"logits": -0.7410406470298767,
"logps": -74.17599487304688,
"loss": 0.037,
"objective": 0.03256476670503616,
"ranking_idealized": 0.6541666388511658,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.6499999761581421,
"regularize": 0.03256473317742348,
"step": 795
},
{
"dpo_loss": 0.5217226147651672,
"epoch": 4.534718941898913,
"grad_norm": 6.810051199376682,
"learning_rate": 1.2482220564763669e-07,
"logits": -0.7353635430335999,
"logps": -72.93279266357422,
"loss": 0.0382,
"objective": 0.03855961933732033,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6916666626930237,
"regularize": 0.03855961188673973,
"step": 800
},
{
"epoch": 4.534718941898913,
"eval_dpo_loss": 0.6858941912651062,
"eval_logits": -0.8972411155700684,
"eval_logps": -77.24726867675781,
"eval_loss": 0.4401608407497406,
"eval_objective": 0.4384419023990631,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5320248007774353,
"eval_regularize": 0.4384419023990631,
"eval_runtime": 259.7863,
"eval_samples_per_second": 22.288,
"eval_steps_per_second": 0.932,
"step": 800
},
{
"dpo_loss": 0.5223442316055298,
"epoch": 4.563060935285781,
"grad_norm": 6.589373819397174,
"learning_rate": 1.0981873156594381e-07,
"logits": -0.8119328022003174,
"logps": -71.91402435302734,
"loss": 0.0371,
"objective": 0.03391700237989426,
"ranking_idealized": 0.6458333134651184,
"ranking_idealized_expo": 0.46666666865348816,
"ranking_simple": 0.6541666388511658,
"regularize": 0.03391699120402336,
"step": 805
},
{
"dpo_loss": 0.5284490585327148,
"epoch": 4.59140292867265,
"grad_norm": 6.877467315737168,
"learning_rate": 9.575542662726756e-08,
"logits": -0.8636207580566406,
"logps": -71.29000854492188,
"loss": 0.0362,
"objective": 0.034173477441072464,
"ranking_idealized": 0.5874999761581421,
"ranking_idealized_expo": 0.4625000059604645,
"ranking_simple": 0.6000000238418579,
"regularize": 0.034173473715782166,
"step": 810
},
{
"dpo_loss": 0.532176673412323,
"epoch": 4.619744922059518,
"grad_norm": 6.410472181963987,
"learning_rate": 8.26378225816582e-08,
"logits": -0.7376688122749329,
"logps": -72.42508697509766,
"loss": 0.0338,
"objective": 0.03282972797751427,
"ranking_idealized": 0.7250000238418579,
"ranking_idealized_expo": 0.5916666388511658,
"ranking_simple": 0.737500011920929,
"regularize": 0.03282969817519188,
"step": 815
},
{
"dpo_loss": 0.5355073809623718,
"epoch": 4.648086915446386,
"grad_norm": 6.840519860011787,
"learning_rate": 7.047107919114588e-08,
"logits": -0.8241658806800842,
"logps": -72.70903778076172,
"loss": 0.0329,
"objective": 0.029066000133752823,
"ranking_idealized": 0.6875,
"ranking_idealized_expo": 0.5625,
"ranking_simple": 0.6875,
"regularize": 0.029065988957881927,
"step": 820
},
{
"dpo_loss": 0.5363853573799133,
"epoch": 4.6764289088332545,
"grad_norm": 6.5036481206185615,
"learning_rate": 5.92599822001666e-08,
"logits": -0.7593883872032166,
"logps": -70.98949432373047,
"loss": 0.0362,
"objective": 0.03307075425982475,
"ranking_idealized": 0.6166666746139526,
"ranking_idealized_expo": 0.5,
"ranking_simple": 0.6041666865348816,
"regularize": 0.03307074308395386,
"step": 825
},
{
"dpo_loss": 0.5210588574409485,
"epoch": 4.7047709022201225,
"grad_norm": 6.54657496412036,
"learning_rate": 4.9008941453107527e-08,
"logits": -0.882694661617279,
"logps": -72.7275390625,
"loss": 0.0382,
"objective": 0.04189787432551384,
"ranking_idealized": 0.675000011920929,
"ranking_idealized_expo": 0.5166666507720947,
"ranking_simple": 0.6875,
"regularize": 0.04189785197377205,
"step": 830
},
{
"dpo_loss": 0.5095342993736267,
"epoch": 4.733112895606991,
"grad_norm": 6.620950665910206,
"learning_rate": 3.972198915970976e-08,
"logits": -0.8053682446479797,
"logps": -73.1785888671875,
"loss": 0.0356,
"objective": 0.03564568608999252,
"ranking_idealized": 0.6499999761581421,
"ranking_idealized_expo": 0.48750001192092896,
"ranking_simple": 0.6416666507720947,
"regularize": 0.03564564883708954,
"step": 835
},
{
"dpo_loss": 0.5004899501800537,
"epoch": 4.7614548889938595,
"grad_norm": 6.845348700884689,
"learning_rate": 3.1402778309014284e-08,
"logits": -0.8175690174102783,
"logps": -73.01248168945312,
"loss": 0.0343,
"objective": 0.030798058956861496,
"ranking_idealized": 0.6958333253860474,
"ranking_idealized_expo": 0.5541666746139526,
"ranking_simple": 0.6875,
"regularize": 0.03079797886312008,
"step": 840
},
{
"dpo_loss": 0.5335594415664673,
"epoch": 4.7897968823807275,
"grad_norm": 6.869470340595315,
"learning_rate": 2.4054581232470785e-08,
"logits": -0.823119044303894,
"logps": -72.64087677001953,
"loss": 0.0337,
"objective": 0.03280922397971153,
"ranking_idealized": 0.6416666507720947,
"ranking_idealized_expo": 0.4583333432674408,
"ranking_simple": 0.6416666507720947,
"regularize": 0.032809216529130936,
"step": 845
},
{
"dpo_loss": 0.5230380892753601,
"epoch": 4.818138875767596,
"grad_norm": 6.728592904487897,
"learning_rate": 1.768028831677926e-08,
"logits": -0.8133633732795715,
"logps": -71.71713256835938,
"loss": 0.032,
"objective": 0.028577150776982307,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6833333373069763,
"regularize": 0.028577139601111412,
"step": 850
},
{
"epoch": 4.818138875767596,
"eval_dpo_loss": 0.686019778251648,
"eval_logits": -0.8982982635498047,
"eval_logps": -77.20530700683594,
"eval_loss": 0.440197616815567,
"eval_objective": 0.4385392367839813,
"eval_ranking_idealized": 0.6570248007774353,
"eval_ranking_idealized_expo": 0.5113636255264282,
"eval_ranking_simple": 0.5320248007774353,
"eval_regularize": 0.4385392367839813,
"eval_runtime": 259.2486,
"eval_samples_per_second": 22.334,
"eval_steps_per_second": 0.933,
"step": 850
},
{
"dpo_loss": 0.533964216709137,
"epoch": 4.846480869154464,
"grad_norm": 6.460809600085337,
"learning_rate": 1.2282406866966078e-08,
"logits": -0.7880451679229736,
"logps": -72.03130340576172,
"loss": 0.033,
"objective": 0.03257838636636734,
"ranking_idealized": 0.7083333134651184,
"ranking_idealized_expo": 0.5375000238418579,
"ranking_simple": 0.7083333134651184,
"regularize": 0.032578371465206146,
"step": 855
},
{
"dpo_loss": 0.5133672952651978,
"epoch": 4.874822862541333,
"grad_norm": 6.904438097045362,
"learning_rate": 7.863060120144316e-09,
"logits": -0.7785756587982178,
"logps": -71.80149841308594,
"loss": 0.034,
"objective": 0.03182807192206383,
"ranking_idealized": 0.7166666388511658,
"ranking_idealized_expo": 0.550000011920929,
"ranking_simple": 0.7124999761581421,
"regularize": 0.031828057020902634,
"step": 860
},
{
"dpo_loss": 0.5214442014694214,
"epoch": 4.903164855928201,
"grad_norm": 6.578317256063193,
"learning_rate": 4.423986410346526e-09,
"logits": -0.8345638513565063,
"logps": -70.81376647949219,
"loss": 0.0332,
"objective": 0.037510212510824203,
"ranking_idealized": 0.6625000238418579,
"ranking_idealized_expo": 0.4958333373069763,
"ranking_simple": 0.6583333611488342,
"regularize": 0.03751020506024361,
"step": 865
},
{
"dpo_loss": 0.5174622535705566,
"epoch": 4.931506849315069,
"grad_norm": 6.93105315931173,
"learning_rate": 1.9665384847583622e-09,
"logits": -0.8424772024154663,
"logps": -72.85356903076172,
"loss": 0.0344,
"objective": 0.0343938022851944,
"ranking_idealized": 0.6916666626930237,
"ranking_idealized_expo": 0.512499988079071,
"ranking_simple": 0.6875,
"regularize": 0.034393779933452606,
"step": 870
},
{
"dpo_loss": 0.524601399898529,
"epoch": 4.959848842701937,
"grad_norm": 6.733993703356916,
"learning_rate": 4.916829716183901e-10,
"logits": -0.790997326374054,
"logps": -72.49830627441406,
"loss": 0.0325,
"objective": 0.03138989955186844,
"ranking_idealized": 0.6833333373069763,
"ranking_idealized_expo": 0.5458333492279053,
"ranking_simple": 0.675000011920929,
"regularize": 0.03138989210128784,
"step": 875
},
{
"dpo_loss": 0.5205584764480591,
"epoch": 4.988190836088805,
"grad_norm": 6.678618993937897,
"learning_rate": 0.0,
"logits": -0.8767089247703552,
"logps": -72.85419464111328,
"loss": 0.0328,
"objective": 0.030487608164548874,
"ranking_idealized": 0.6708333492279053,
"ranking_idealized_expo": 0.5291666388511658,
"ranking_simple": 0.6625000238418579,
"regularize": 0.030487585812807083,
"step": 880
},
{
"epoch": 4.988190836088805,
"step": 880,
"total_flos": 0.0,
"train_loss": 0.15004684163088147,
"train_runtime": 35354.4588,
"train_samples_per_second": 7.185,
"train_steps_per_second": 0.025
}
],
"logging_steps": 5,
"max_steps": 880,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}