|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.988190836088805, |
|
"eval_steps": 50, |
|
"global_step": 880, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_loss": 0.6931471824645996, |
|
"epoch": 0.005668398677373642, |
|
"grad_norm": 13.413595204669061, |
|
"learning_rate": 5.681818181818182e-08, |
|
"logits": -1.3147305250167847, |
|
"logps": -88.0877456665039, |
|
"loss": 0.4113, |
|
"objective": 0.41588976979255676, |
|
"ranking_idealized": 0.6875, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.5208333134651184, |
|
"regularize": 0.41588976979255676, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_loss": 0.6931002736091614, |
|
"epoch": 0.02834199338686821, |
|
"grad_norm": 13.427332141277578, |
|
"learning_rate": 2.840909090909091e-07, |
|
"logits": -1.3680692911148071, |
|
"logps": -84.26158905029297, |
|
"loss": 0.4129, |
|
"objective": 0.3754810094833374, |
|
"ranking_idealized": 0.6510416865348816, |
|
"ranking_idealized_expo": 0.5572916865348816, |
|
"ranking_simple": 0.546875, |
|
"regularize": 0.3754810094833374, |
|
"step": 5 |
|
}, |
|
{ |
|
"dpo_loss": 0.6900920867919922, |
|
"epoch": 0.05668398677373642, |
|
"grad_norm": 12.67240246603534, |
|
"learning_rate": 5.681818181818182e-07, |
|
"logits": -1.4469478130340576, |
|
"logps": -82.44185638427734, |
|
"loss": 0.4149, |
|
"objective": 0.43780091404914856, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.5166666507720947, |
|
"regularize": 0.43780091404914856, |
|
"step": 10 |
|
}, |
|
{ |
|
"dpo_loss": 0.6891883015632629, |
|
"epoch": 0.08502598016060463, |
|
"grad_norm": 12.900443318480342, |
|
"learning_rate": 8.522727272727273e-07, |
|
"logits": -1.4273536205291748, |
|
"logps": -81.69231414794922, |
|
"loss": 0.419, |
|
"objective": 0.40471941232681274, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.512499988079071, |
|
"regularize": 0.40471941232681274, |
|
"step": 15 |
|
}, |
|
{ |
|
"dpo_loss": 0.6918003559112549, |
|
"epoch": 0.11336797354747284, |
|
"grad_norm": 14.313913436693964, |
|
"learning_rate": 1.1363636363636364e-06, |
|
"logits": -1.437472939491272, |
|
"logps": -82.81884765625, |
|
"loss": 0.4037, |
|
"objective": 0.403365820646286, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.403365820646286, |
|
"step": 20 |
|
}, |
|
{ |
|
"dpo_loss": 0.6768646836280823, |
|
"epoch": 0.14170996693434104, |
|
"grad_norm": 13.58368371280586, |
|
"learning_rate": 1.4204545454545458e-06, |
|
"logits": -1.5096564292907715, |
|
"logps": -82.65319061279297, |
|
"loss": 0.3931, |
|
"objective": 0.409546822309494, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.5041666626930237, |
|
"regularize": 0.409546822309494, |
|
"step": 25 |
|
}, |
|
{ |
|
"dpo_loss": 0.6711666584014893, |
|
"epoch": 0.17005196032120926, |
|
"grad_norm": 13.105526207742551, |
|
"learning_rate": 1.7045454545454546e-06, |
|
"logits": -1.5050361156463623, |
|
"logps": -83.46080780029297, |
|
"loss": 0.3841, |
|
"objective": 0.38422220945358276, |
|
"ranking_idealized": 0.6875, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5666666626930237, |
|
"regularize": 0.38422220945358276, |
|
"step": 30 |
|
}, |
|
{ |
|
"dpo_loss": 0.6604105234146118, |
|
"epoch": 0.19839395370807747, |
|
"grad_norm": 14.531223489181945, |
|
"learning_rate": 1.9886363636363638e-06, |
|
"logits": -1.5017287731170654, |
|
"logps": -84.02853393554688, |
|
"loss": 0.3722, |
|
"objective": 0.35262614488601685, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.5333333611488342, |
|
"regularize": 0.35262614488601685, |
|
"step": 35 |
|
}, |
|
{ |
|
"dpo_loss": 0.6500855684280396, |
|
"epoch": 0.22673594709494568, |
|
"grad_norm": 14.371511230761167, |
|
"learning_rate": 2.2727272727272728e-06, |
|
"logits": -1.5274395942687988, |
|
"logps": -84.69414520263672, |
|
"loss": 0.379, |
|
"objective": 0.3910427689552307, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.5583333373069763, |
|
"regularize": 0.3910427689552307, |
|
"step": 40 |
|
}, |
|
{ |
|
"dpo_loss": 0.639613151550293, |
|
"epoch": 0.25507794048181387, |
|
"grad_norm": 17.129866291690814, |
|
"learning_rate": 2.556818181818182e-06, |
|
"logits": -1.6906952857971191, |
|
"logps": -86.7696304321289, |
|
"loss": 0.3764, |
|
"objective": 0.4166857898235321, |
|
"ranking_idealized": 0.7208333611488342, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.4166857898235321, |
|
"step": 45 |
|
}, |
|
{ |
|
"dpo_loss": 0.637481153011322, |
|
"epoch": 0.2834199338686821, |
|
"grad_norm": 12.595130895032831, |
|
"learning_rate": 2.8409090909090916e-06, |
|
"logits": -1.604835033416748, |
|
"logps": -87.7275161743164, |
|
"loss": 0.3566, |
|
"objective": 0.3444138169288635, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.42500001192092896, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.3444138169288635, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2834199338686821, |
|
"eval_dpo_loss": 0.6897606253623962, |
|
"eval_logits": -1.6203083992004395, |
|
"eval_logps": -96.4831314086914, |
|
"eval_loss": 0.413291871547699, |
|
"eval_objective": 0.4237224757671356, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5165289044380188, |
|
"eval_regularize": 0.4237224757671356, |
|
"eval_runtime": 259.6612, |
|
"eval_samples_per_second": 22.298, |
|
"eval_steps_per_second": 0.932, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_loss": 0.6459915041923523, |
|
"epoch": 0.3117619272555503, |
|
"grad_norm": 12.272365561333357, |
|
"learning_rate": 3.125e-06, |
|
"logits": -1.6675435304641724, |
|
"logps": -91.76095581054688, |
|
"loss": 0.3508, |
|
"objective": 0.359805166721344, |
|
"ranking_idealized": 0.7124999761581421, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.359805166721344, |
|
"step": 55 |
|
}, |
|
{ |
|
"dpo_loss": 0.6238431334495544, |
|
"epoch": 0.3401039206424185, |
|
"grad_norm": 13.401317383285344, |
|
"learning_rate": 3.409090909090909e-06, |
|
"logits": -1.6435742378234863, |
|
"logps": -88.34529113769531, |
|
"loss": 0.3357, |
|
"objective": 0.3332988917827606, |
|
"ranking_idealized": 0.7041666507720947, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.3332988917827606, |
|
"step": 60 |
|
}, |
|
{ |
|
"dpo_loss": 0.6304014325141907, |
|
"epoch": 0.3684459140292867, |
|
"grad_norm": 12.713715258382189, |
|
"learning_rate": 3.6931818181818186e-06, |
|
"logits": -1.5630245208740234, |
|
"logps": -86.62226867675781, |
|
"loss": 0.3325, |
|
"objective": 0.3380378782749176, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.3380378782749176, |
|
"step": 65 |
|
}, |
|
{ |
|
"dpo_loss": 0.627398669719696, |
|
"epoch": 0.39678790741615494, |
|
"grad_norm": 12.34517477378158, |
|
"learning_rate": 3.9772727272727275e-06, |
|
"logits": -1.519757628440857, |
|
"logps": -88.53465270996094, |
|
"loss": 0.3261, |
|
"objective": 0.3733421266078949, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.3733421266078949, |
|
"step": 70 |
|
}, |
|
{ |
|
"dpo_loss": 0.618600070476532, |
|
"epoch": 0.42512990080302315, |
|
"grad_norm": 13.394680047264217, |
|
"learning_rate": 4.2613636363636365e-06, |
|
"logits": -1.444022297859192, |
|
"logps": -86.64772033691406, |
|
"loss": 0.3211, |
|
"objective": 0.3317987322807312, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.3317987322807312, |
|
"step": 75 |
|
}, |
|
{ |
|
"dpo_loss": 0.6121571660041809, |
|
"epoch": 0.45347189418989137, |
|
"grad_norm": 12.637476273331325, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"logits": -1.5524269342422485, |
|
"logps": -82.45478057861328, |
|
"loss": 0.3164, |
|
"objective": 0.33297327160835266, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.5833333134651184, |
|
"regularize": 0.33297327160835266, |
|
"step": 80 |
|
}, |
|
{ |
|
"dpo_loss": 0.5934053659439087, |
|
"epoch": 0.4818138875767596, |
|
"grad_norm": 11.681739450816789, |
|
"learning_rate": 4.829545454545455e-06, |
|
"logits": -1.5023764371871948, |
|
"logps": -83.81694030761719, |
|
"loss": 0.3085, |
|
"objective": 0.2888755202293396, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.2888755202293396, |
|
"step": 85 |
|
}, |
|
{ |
|
"dpo_loss": 0.6050879955291748, |
|
"epoch": 0.5101558809636277, |
|
"grad_norm": 11.586500005637587, |
|
"learning_rate": 4.999921328558333e-06, |
|
"logits": -1.246580958366394, |
|
"logps": -88.33258819580078, |
|
"loss": 0.3085, |
|
"objective": 0.3082594871520996, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.5458333492279053, |
|
"regularize": 0.3082594871520996, |
|
"step": 90 |
|
}, |
|
{ |
|
"dpo_loss": 0.5785138010978699, |
|
"epoch": 0.538497874350496, |
|
"grad_norm": 11.1585869550919, |
|
"learning_rate": 4.999036331701828e-06, |
|
"logits": -1.2436394691467285, |
|
"logps": -85.56443786621094, |
|
"loss": 0.3059, |
|
"objective": 0.293775349855423, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.293775349855423, |
|
"step": 95 |
|
}, |
|
{ |
|
"dpo_loss": 0.5692493319511414, |
|
"epoch": 0.5668398677373642, |
|
"grad_norm": 10.57766394132754, |
|
"learning_rate": 4.997168347957521e-06, |
|
"logits": -1.3053488731384277, |
|
"logps": -83.31777954101562, |
|
"loss": 0.3027, |
|
"objective": 0.31831109523773193, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.31831109523773193, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5668398677373642, |
|
"eval_dpo_loss": 0.686188817024231, |
|
"eval_logits": -1.3063371181488037, |
|
"eval_logps": -88.41004180908203, |
|
"eval_loss": 0.41417956352233887, |
|
"eval_objective": 0.41506800055503845, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5216942429542542, |
|
"eval_regularize": 0.41506800055503845, |
|
"eval_runtime": 259.1694, |
|
"eval_samples_per_second": 22.341, |
|
"eval_steps_per_second": 0.934, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_loss": 0.5781983733177185, |
|
"epoch": 0.5951818611242324, |
|
"grad_norm": 10.454612496016592, |
|
"learning_rate": 4.994318112090048e-06, |
|
"logits": -1.1954314708709717, |
|
"logps": -83.58454132080078, |
|
"loss": 0.3006, |
|
"objective": 0.31248193979263306, |
|
"ranking_idealized": 0.7083333134651184, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.31248193979263306, |
|
"step": 105 |
|
}, |
|
{ |
|
"dpo_loss": 0.5739426016807556, |
|
"epoch": 0.6235238545111006, |
|
"grad_norm": 10.705962678396897, |
|
"learning_rate": 4.990486745229364e-06, |
|
"logits": -1.4220352172851562, |
|
"logps": -82.55160522460938, |
|
"loss": 0.3033, |
|
"objective": 0.31312334537506104, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.31312334537506104, |
|
"step": 110 |
|
}, |
|
{ |
|
"dpo_loss": 0.5884331464767456, |
|
"epoch": 0.6518658478979689, |
|
"grad_norm": 10.243480321598604, |
|
"learning_rate": 4.985675754429744e-06, |
|
"logits": -1.5664387941360474, |
|
"logps": -80.2437515258789, |
|
"loss": 0.2914, |
|
"objective": 0.29273518919944763, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.29273518919944763, |
|
"step": 115 |
|
}, |
|
{ |
|
"dpo_loss": 0.5871846079826355, |
|
"epoch": 0.680207841284837, |
|
"grad_norm": 9.272918607237454, |
|
"learning_rate": 4.9798870320769884e-06, |
|
"logits": -1.5533816814422607, |
|
"logps": -77.25326538085938, |
|
"loss": 0.2962, |
|
"objective": 0.27850577235221863, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.27850577235221863, |
|
"step": 120 |
|
}, |
|
{ |
|
"dpo_loss": 0.5849189162254333, |
|
"epoch": 0.7085498346717053, |
|
"grad_norm": 9.087139481090837, |
|
"learning_rate": 4.973122855144066e-06, |
|
"logits": -1.4378304481506348, |
|
"logps": -76.45697784423828, |
|
"loss": 0.286, |
|
"objective": 0.27717408537864685, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.27717408537864685, |
|
"step": 125 |
|
}, |
|
{ |
|
"dpo_loss": 0.5918333530426025, |
|
"epoch": 0.7368918280585735, |
|
"grad_norm": 9.798401176649499, |
|
"learning_rate": 4.965385884295467e-06, |
|
"logits": -1.5077089071273804, |
|
"logps": -75.78624725341797, |
|
"loss": 0.2914, |
|
"objective": 0.29082292318344116, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.4541666805744171, |
|
"ranking_simple": 0.5625, |
|
"regularize": 0.29082292318344116, |
|
"step": 130 |
|
}, |
|
{ |
|
"dpo_loss": 0.5844586491584778, |
|
"epoch": 0.7652338214454416, |
|
"grad_norm": 9.388728292193363, |
|
"learning_rate": 4.956679162840646e-06, |
|
"logits": -1.4935728311538696, |
|
"logps": -77.08039093017578, |
|
"loss": 0.2751, |
|
"objective": 0.27092453837394714, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.27092453837394714, |
|
"step": 135 |
|
}, |
|
{ |
|
"dpo_loss": 0.5776726007461548, |
|
"epoch": 0.7935758148323099, |
|
"grad_norm": 9.368061718815978, |
|
"learning_rate": 4.947006115536947e-06, |
|
"logits": -1.344637393951416, |
|
"logps": -79.50406646728516, |
|
"loss": 0.278, |
|
"objective": 0.272522896528244, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.5708333253860474, |
|
"regularize": 0.272522896528244, |
|
"step": 140 |
|
}, |
|
{ |
|
"dpo_loss": 0.583095908164978, |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 8.887316448589162, |
|
"learning_rate": 4.9363705472424825e-06, |
|
"logits": -1.260974645614624, |
|
"logps": -80.34443664550781, |
|
"loss": 0.2702, |
|
"objective": 0.27244552969932556, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.5791666507720947, |
|
"regularize": 0.27244552969932556, |
|
"step": 145 |
|
}, |
|
{ |
|
"dpo_loss": 0.5877144932746887, |
|
"epoch": 0.8502598016060463, |
|
"grad_norm": 8.678793910307522, |
|
"learning_rate": 4.924776641419513e-06, |
|
"logits": -1.168263554573059, |
|
"logps": -79.24138641357422, |
|
"loss": 0.2706, |
|
"objective": 0.26840564608573914, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6625000238418579, |
|
"regularize": 0.26840564608573914, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8502598016060463, |
|
"eval_dpo_loss": 0.6856931447982788, |
|
"eval_logits": -1.1981431245803833, |
|
"eval_logps": -87.367431640625, |
|
"eval_loss": 0.42615318298339844, |
|
"eval_objective": 0.42774394154548645, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5278925895690918, |
|
"eval_regularize": 0.42774394154548645, |
|
"eval_runtime": 258.5616, |
|
"eval_samples_per_second": 22.393, |
|
"eval_steps_per_second": 0.936, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_loss": 0.5757229924201965, |
|
"epoch": 0.8786017949929145, |
|
"grad_norm": 9.10049250548801, |
|
"learning_rate": 4.9122289584888926e-06, |
|
"logits": -1.1876070499420166, |
|
"logps": -79.34397888183594, |
|
"loss": 0.2634, |
|
"objective": 0.2684144973754883, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.2684144973754883, |
|
"step": 155 |
|
}, |
|
{ |
|
"dpo_loss": 0.5666177868843079, |
|
"epoch": 0.9069437883797827, |
|
"grad_norm": 8.820116531265606, |
|
"learning_rate": 4.8987324340362445e-06, |
|
"logits": -1.129431962966919, |
|
"logps": -79.80657196044922, |
|
"loss": 0.2548, |
|
"objective": 0.24889370799064636, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.24889370799064636, |
|
"step": 160 |
|
}, |
|
{ |
|
"dpo_loss": 0.5674318671226501, |
|
"epoch": 0.9352857817666509, |
|
"grad_norm": 8.768643401715794, |
|
"learning_rate": 4.884292376870567e-06, |
|
"logits": -1.08602774143219, |
|
"logps": -80.07511138916016, |
|
"loss": 0.2569, |
|
"objective": 0.24699881672859192, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.24699881672859192, |
|
"step": 165 |
|
}, |
|
{ |
|
"dpo_loss": 0.5818125605583191, |
|
"epoch": 0.9636277751535192, |
|
"grad_norm": 8.727560270319945, |
|
"learning_rate": 4.868914466936038e-06, |
|
"logits": -1.1042813062667847, |
|
"logps": -81.02680206298828, |
|
"loss": 0.2644, |
|
"objective": 0.2629285454750061, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.2629285454750061, |
|
"step": 170 |
|
}, |
|
{ |
|
"dpo_loss": 0.5690818428993225, |
|
"epoch": 0.9919697685403873, |
|
"grad_norm": 9.360612159540104, |
|
"learning_rate": 4.8526047530778175e-06, |
|
"logits": -1.0876260995864868, |
|
"logps": -81.20608520507812, |
|
"loss": 0.257, |
|
"objective": 0.2705513834953308, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.675000011920929, |
|
"regularize": 0.2705513834953308, |
|
"step": 175 |
|
}, |
|
{ |
|
"dpo_loss": 0.5392836332321167, |
|
"epoch": 1.0203117619272555, |
|
"grad_norm": 8.811963294729349, |
|
"learning_rate": 4.835369650662767e-06, |
|
"logits": -1.2233085632324219, |
|
"logps": -79.34477233886719, |
|
"loss": 0.2467, |
|
"objective": 0.25315728783607483, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.25315728783607483, |
|
"step": 180 |
|
}, |
|
{ |
|
"dpo_loss": 0.5429127812385559, |
|
"epoch": 1.0486537553141237, |
|
"grad_norm": 7.990993827783984, |
|
"learning_rate": 4.817215939055984e-06, |
|
"logits": -1.0636595487594604, |
|
"logps": -77.5766830444336, |
|
"loss": 0.2395, |
|
"objective": 0.23463593423366547, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.23463593423366547, |
|
"step": 185 |
|
}, |
|
{ |
|
"dpo_loss": 0.5637180209159851, |
|
"epoch": 1.076995748700992, |
|
"grad_norm": 8.256999336661497, |
|
"learning_rate": 4.798150758954164e-06, |
|
"logits": -1.0749539136886597, |
|
"logps": -78.55563354492188, |
|
"loss": 0.2191, |
|
"objective": 0.21444876492023468, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.2144487500190735, |
|
"step": 190 |
|
}, |
|
{ |
|
"dpo_loss": 0.5680408477783203, |
|
"epoch": 1.10533774208786, |
|
"grad_norm": 8.413792498503495, |
|
"learning_rate": 4.778181609576832e-06, |
|
"logits": -1.1007658243179321, |
|
"logps": -77.28519439697266, |
|
"loss": 0.2178, |
|
"objective": 0.21195411682128906, |
|
"ranking_idealized": 0.7041666507720947, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.6666666865348816, |
|
"regularize": 0.21195411682128906, |
|
"step": 195 |
|
}, |
|
{ |
|
"dpo_loss": 0.5640944838523865, |
|
"epoch": 1.1336797354747283, |
|
"grad_norm": 7.655804437668192, |
|
"learning_rate": 4.757316345716554e-06, |
|
"logits": -1.2176551818847656, |
|
"logps": -76.59423828125, |
|
"loss": 0.2256, |
|
"objective": 0.2188282459974289, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.2188282459974289, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1336797354747283, |
|
"eval_dpo_loss": 0.6861998438835144, |
|
"eval_logits": -1.2023168802261353, |
|
"eval_logps": -81.81192016601562, |
|
"eval_loss": 0.43468645215034485, |
|
"eval_objective": 0.434445858001709, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5247933864593506, |
|
"eval_regularize": 0.434445858001709, |
|
"eval_runtime": 259.1196, |
|
"eval_samples_per_second": 22.345, |
|
"eval_steps_per_second": 0.934, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_loss": 0.5600470304489136, |
|
"epoch": 1.1620217288615966, |
|
"grad_norm": 7.695005611618307, |
|
"learning_rate": 4.735563174649278e-06, |
|
"logits": -1.1184250116348267, |
|
"logps": -77.62281799316406, |
|
"loss": 0.2243, |
|
"objective": 0.22593899071216583, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.22593899071216583, |
|
"step": 205 |
|
}, |
|
{ |
|
"dpo_loss": 0.5418952107429504, |
|
"epoch": 1.1903637222484649, |
|
"grad_norm": 7.898188189840939, |
|
"learning_rate": 4.7129306529060415e-06, |
|
"logits": -1.0059646368026733, |
|
"logps": -78.90467834472656, |
|
"loss": 0.2215, |
|
"objective": 0.21866732835769653, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.21866732835769653, |
|
"step": 210 |
|
}, |
|
{ |
|
"dpo_loss": 0.5478299856185913, |
|
"epoch": 1.2187057156353331, |
|
"grad_norm": 8.602045809831973, |
|
"learning_rate": 4.68942768290728e-06, |
|
"logits": -0.9527910947799683, |
|
"logps": -78.7351303100586, |
|
"loss": 0.2113, |
|
"objective": 0.21119125187397003, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.21119123697280884, |
|
"step": 215 |
|
}, |
|
{ |
|
"dpo_loss": 0.5662825107574463, |
|
"epoch": 1.2470477090222012, |
|
"grad_norm": 8.077787041174624, |
|
"learning_rate": 4.665063509461098e-06, |
|
"logits": -0.8720958828926086, |
|
"logps": -76.756591796875, |
|
"loss": 0.2138, |
|
"objective": 0.20959888398647308, |
|
"ranking_idealized": 0.7208333611488342, |
|
"ranking_idealized_expo": 0.6041666865348816, |
|
"ranking_simple": 0.6958333253860474, |
|
"regularize": 0.20959888398647308, |
|
"step": 220 |
|
}, |
|
{ |
|
"dpo_loss": 0.5461317300796509, |
|
"epoch": 1.2753897024090695, |
|
"grad_norm": 7.874404608360002, |
|
"learning_rate": 4.639847716126855e-06, |
|
"logits": -0.9673039317131042, |
|
"logps": -78.00302124023438, |
|
"loss": 0.2154, |
|
"objective": 0.20724421739578247, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.20724421739578247, |
|
"step": 225 |
|
}, |
|
{ |
|
"dpo_loss": 0.5674420595169067, |
|
"epoch": 1.3037316957959377, |
|
"grad_norm": 7.50398022531709, |
|
"learning_rate": 4.613790221445511e-06, |
|
"logits": -0.9144200682640076, |
|
"logps": -78.14189910888672, |
|
"loss": 0.2025, |
|
"objective": 0.21042712032794952, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.21042712032794952, |
|
"step": 230 |
|
}, |
|
{ |
|
"dpo_loss": 0.5355426669120789, |
|
"epoch": 1.3320736891828058, |
|
"grad_norm": 7.978556347314524, |
|
"learning_rate": 4.586901275038201e-06, |
|
"logits": -1.0436056852340698, |
|
"logps": -76.17823791503906, |
|
"loss": 0.2072, |
|
"objective": 0.19967219233512878, |
|
"ranking_idealized": 0.6875, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.19967219233512878, |
|
"step": 235 |
|
}, |
|
{ |
|
"dpo_loss": 0.5373556613922119, |
|
"epoch": 1.360415682569674, |
|
"grad_norm": 7.6373820167513635, |
|
"learning_rate": 4.559191453574582e-06, |
|
"logits": -0.9692198038101196, |
|
"logps": -78.42900085449219, |
|
"loss": 0.1972, |
|
"objective": 0.19282637536525726, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.19282637536525726, |
|
"step": 240 |
|
}, |
|
{ |
|
"dpo_loss": 0.5545187592506409, |
|
"epoch": 1.3887576759565423, |
|
"grad_norm": 7.35404104460057, |
|
"learning_rate": 4.530671656612544e-06, |
|
"logits": -0.992374062538147, |
|
"logps": -76.8333969116211, |
|
"loss": 0.1965, |
|
"objective": 0.19396263360977173, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.19396263360977173, |
|
"step": 245 |
|
}, |
|
{ |
|
"dpo_loss": 0.536364734172821, |
|
"epoch": 1.4170996693434104, |
|
"grad_norm": 7.62812382511743, |
|
"learning_rate": 4.501353102310901e-06, |
|
"logits": -0.9773390293121338, |
|
"logps": -76.92967224121094, |
|
"loss": 0.2005, |
|
"objective": 0.2195345014333725, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.2195345014333725, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.4170996693434104, |
|
"eval_dpo_loss": 0.6815471053123474, |
|
"eval_logits": -1.0615853071212769, |
|
"eval_logps": -81.82124328613281, |
|
"eval_loss": 0.42915773391723633, |
|
"eval_objective": 0.4288579821586609, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5227272510528564, |
|
"eval_regularize": 0.4288579821586609, |
|
"eval_runtime": 259.2538, |
|
"eval_samples_per_second": 22.333, |
|
"eval_steps_per_second": 0.933, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_loss": 0.5536395907402039, |
|
"epoch": 1.4454416627302786, |
|
"grad_norm": 7.608994157416838, |
|
"learning_rate": 4.4712473230167775e-06, |
|
"logits": -0.9322084784507751, |
|
"logps": -77.44235229492188, |
|
"loss": 0.1968, |
|
"objective": 0.1830952763557434, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.1830952763557434, |
|
"step": 255 |
|
}, |
|
{ |
|
"dpo_loss": 0.5560018420219421, |
|
"epoch": 1.473783656117147, |
|
"grad_norm": 7.9576485937673995, |
|
"learning_rate": 4.440366160729393e-06, |
|
"logits": -0.9907886385917664, |
|
"logps": -77.4918212890625, |
|
"loss": 0.197, |
|
"objective": 0.20965854823589325, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.20965854823589325, |
|
"step": 260 |
|
}, |
|
{ |
|
"dpo_loss": 0.5228397250175476, |
|
"epoch": 1.5021256495040152, |
|
"grad_norm": 7.710193409626713, |
|
"learning_rate": 4.4087217624420595e-06, |
|
"logits": -0.9860392808914185, |
|
"logps": -76.1131362915039, |
|
"loss": 0.1993, |
|
"objective": 0.18088673055171967, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.18088671565055847, |
|
"step": 265 |
|
}, |
|
{ |
|
"dpo_loss": 0.5495325922966003, |
|
"epoch": 1.5304676428908834, |
|
"grad_norm": 7.266856200970056, |
|
"learning_rate": 4.376326575364206e-06, |
|
"logits": -0.9504061341285706, |
|
"logps": -77.44710540771484, |
|
"loss": 0.1911, |
|
"objective": 0.1995639055967331, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.1995639055967331, |
|
"step": 270 |
|
}, |
|
{ |
|
"dpo_loss": 0.5499274730682373, |
|
"epoch": 1.5588096362777515, |
|
"grad_norm": 7.524550189383748, |
|
"learning_rate": 4.34319334202531e-06, |
|
"logits": -0.959584653377533, |
|
"logps": -77.12781524658203, |
|
"loss": 0.1924, |
|
"objective": 0.20593222975730896, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6833333373069763, |
|
"regularize": 0.20593222975730896, |
|
"step": 275 |
|
}, |
|
{ |
|
"dpo_loss": 0.5305771827697754, |
|
"epoch": 1.5871516296646198, |
|
"grad_norm": 7.773566185560746, |
|
"learning_rate": 4.309335095262675e-06, |
|
"logits": -0.855711042881012, |
|
"logps": -74.90408325195312, |
|
"loss": 0.1869, |
|
"objective": 0.18780963122844696, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.18780963122844696, |
|
"step": 280 |
|
}, |
|
{ |
|
"dpo_loss": 0.5325539112091064, |
|
"epoch": 1.615493623051488, |
|
"grad_norm": 7.526664818437735, |
|
"learning_rate": 4.274765153095008e-06, |
|
"logits": -0.8781672120094299, |
|
"logps": -76.42524719238281, |
|
"loss": 0.1948, |
|
"objective": 0.18717069923877716, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.18717069923877716, |
|
"step": 285 |
|
}, |
|
{ |
|
"dpo_loss": 0.5575224757194519, |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 6.835237556478757, |
|
"learning_rate": 4.239497113483819e-06, |
|
"logits": -0.8322954773902893, |
|
"logps": -74.6546859741211, |
|
"loss": 0.1789, |
|
"objective": 0.182403564453125, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.6625000238418579, |
|
"regularize": 0.182403564453125, |
|
"step": 290 |
|
}, |
|
{ |
|
"dpo_loss": 0.5432038903236389, |
|
"epoch": 1.6721776098252243, |
|
"grad_norm": 6.961571934316881, |
|
"learning_rate": 4.203544848984729e-06, |
|
"logits": -0.8109145760536194, |
|
"logps": -72.75186920166016, |
|
"loss": 0.1859, |
|
"objective": 0.18214626610279083, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.18214626610279083, |
|
"step": 295 |
|
}, |
|
{ |
|
"dpo_loss": 0.5352488160133362, |
|
"epoch": 1.7005196032120926, |
|
"grad_norm": 7.403474665890531, |
|
"learning_rate": 4.16692250129073e-06, |
|
"logits": -0.9637666344642639, |
|
"logps": -74.78925323486328, |
|
"loss": 0.187, |
|
"objective": 0.20280833542346954, |
|
"ranking_idealized": 0.7083333134651184, |
|
"ranking_idealized_expo": 0.574999988079071, |
|
"ranking_simple": 0.6958333253860474, |
|
"regularize": 0.20280833542346954, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7005196032120926, |
|
"eval_dpo_loss": 0.6845287680625916, |
|
"eval_logits": -1.0398027896881104, |
|
"eval_logps": -80.0077133178711, |
|
"eval_loss": 0.43694496154785156, |
|
"eval_objective": 0.43617644906044006, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5258264541625977, |
|
"eval_regularize": 0.43617644906044006, |
|
"eval_runtime": 259.8028, |
|
"eval_samples_per_second": 22.286, |
|
"eval_steps_per_second": 0.931, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_loss": 0.5487022399902344, |
|
"epoch": 1.7288615965989607, |
|
"grad_norm": 7.543811016492746, |
|
"learning_rate": 4.129644475669617e-06, |
|
"logits": -0.9759048223495483, |
|
"logps": -74.88541412353516, |
|
"loss": 0.1848, |
|
"objective": 0.18824300169944763, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.625, |
|
"regularize": 0.18824300169944763, |
|
"step": 305 |
|
}, |
|
{ |
|
"dpo_loss": 0.5396175980567932, |
|
"epoch": 1.7572035899858292, |
|
"grad_norm": 7.45667126547559, |
|
"learning_rate": 4.091725435297721e-06, |
|
"logits": -1.014137625694275, |
|
"logps": -71.69686126708984, |
|
"loss": 0.187, |
|
"objective": 0.18091975152492523, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.18091975152492523, |
|
"step": 310 |
|
}, |
|
{ |
|
"dpo_loss": 0.5542029142379761, |
|
"epoch": 1.7855455833726972, |
|
"grad_norm": 7.022791945824948, |
|
"learning_rate": 4.053180295492203e-06, |
|
"logits": -0.9052151441574097, |
|
"logps": -72.4874038696289, |
|
"loss": 0.1773, |
|
"objective": 0.17407093942165375, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.17407093942165375, |
|
"step": 315 |
|
}, |
|
{ |
|
"dpo_loss": 0.5506651401519775, |
|
"epoch": 1.8138875767595655, |
|
"grad_norm": 7.473980140791967, |
|
"learning_rate": 4.014024217844167e-06, |
|
"logits": -0.9277843832969666, |
|
"logps": -75.52190399169922, |
|
"loss": 0.1782, |
|
"objective": 0.19924014806747437, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.637499988079071, |
|
"regularize": 0.19924013316631317, |
|
"step": 320 |
|
}, |
|
{ |
|
"dpo_loss": 0.5471735000610352, |
|
"epoch": 1.8422295701464337, |
|
"grad_norm": 7.500911105092317, |
|
"learning_rate": 3.974272604254906e-06, |
|
"logits": -0.9084165096282959, |
|
"logps": -77.74280548095703, |
|
"loss": 0.1791, |
|
"objective": 0.18182264268398285, |
|
"ranking_idealized": 0.7166666388511658, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6875, |
|
"regularize": 0.18182264268398285, |
|
"step": 325 |
|
}, |
|
{ |
|
"dpo_loss": 0.5376237034797668, |
|
"epoch": 1.8705715635333018, |
|
"grad_norm": 6.956332723343249, |
|
"learning_rate": 3.933941090877615e-06, |
|
"logits": -0.6826075911521912, |
|
"logps": -74.37185668945312, |
|
"loss": 0.1748, |
|
"objective": 0.15574544668197632, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.15574544668197632, |
|
"step": 330 |
|
}, |
|
{ |
|
"dpo_loss": 0.5471087694168091, |
|
"epoch": 1.89891355692017, |
|
"grad_norm": 7.004792858354732, |
|
"learning_rate": 3.893045541966975e-06, |
|
"logits": -0.8625032901763916, |
|
"logps": -73.06546020507812, |
|
"loss": 0.1666, |
|
"objective": 0.18181832134723663, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.18181832134723663, |
|
"step": 335 |
|
}, |
|
{ |
|
"dpo_loss": 0.5468536615371704, |
|
"epoch": 1.9272555503070383, |
|
"grad_norm": 7.263082545218723, |
|
"learning_rate": 3.8516020436389945e-06, |
|
"logits": -0.8647869825363159, |
|
"logps": -74.24337005615234, |
|
"loss": 0.1658, |
|
"objective": 0.16866040229797363, |
|
"ranking_idealized": 0.7291666865348816, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.7166666388511658, |
|
"regularize": 0.16866040229797363, |
|
"step": 340 |
|
}, |
|
{ |
|
"dpo_loss": 0.5612522959709167, |
|
"epoch": 1.9555975436939064, |
|
"grad_norm": 7.17931193885588, |
|
"learning_rate": 3.8096268975436045e-06, |
|
"logits": -0.9274277091026306, |
|
"logps": -72.64501953125, |
|
"loss": 0.1631, |
|
"objective": 0.1834598332643509, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.1834598332643509, |
|
"step": 345 |
|
}, |
|
{ |
|
"dpo_loss": 0.5486599802970886, |
|
"epoch": 1.9839395370807746, |
|
"grad_norm": 7.4574942962802675, |
|
"learning_rate": 3.767136614452458e-06, |
|
"logits": -0.8831450343132019, |
|
"logps": -75.68425750732422, |
|
"loss": 0.1664, |
|
"objective": 0.15772633254528046, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.15772633254528046, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.9839395370807746, |
|
"eval_dpo_loss": 0.6841984987258911, |
|
"eval_logits": -0.9982038140296936, |
|
"eval_logps": -79.63081359863281, |
|
"eval_loss": 0.4382030665874481, |
|
"eval_objective": 0.4358615577220917, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5289255976676941, |
|
"eval_regularize": 0.4358615577220917, |
|
"eval_runtime": 258.5329, |
|
"eval_samples_per_second": 22.396, |
|
"eval_steps_per_second": 0.936, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_loss": 0.5294139981269836, |
|
"epoch": 2.012281530467643, |
|
"grad_norm": 7.097987961305298, |
|
"learning_rate": 3.724147907764478e-06, |
|
"logits": -0.8353627324104309, |
|
"logps": -75.18488311767578, |
|
"loss": 0.1556, |
|
"objective": 0.14675287902355194, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.675000011920929, |
|
"regularize": 0.14675287902355194, |
|
"step": 355 |
|
}, |
|
{ |
|
"dpo_loss": 0.5278249979019165, |
|
"epoch": 2.040623523854511, |
|
"grad_norm": 7.097858061871919, |
|
"learning_rate": 3.6806776869317074e-06, |
|
"logits": -0.8662230372428894, |
|
"logps": -74.37833404541016, |
|
"loss": 0.1484, |
|
"objective": 0.13966700434684753, |
|
"ranking_idealized": 0.7250000238418579, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6916666626930237, |
|
"regularize": 0.13966700434684753, |
|
"step": 360 |
|
}, |
|
{ |
|
"dpo_loss": 0.5365410447120667, |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 7.018046135257267, |
|
"learning_rate": 3.6367430508080283e-06, |
|
"logits": -0.9098676443099976, |
|
"logps": -77.11681365966797, |
|
"loss": 0.1521, |
|
"objective": 0.14031733572483063, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.14031733572483063, |
|
"step": 365 |
|
}, |
|
{ |
|
"dpo_loss": 0.5375342965126038, |
|
"epoch": 2.0973075106282475, |
|
"grad_norm": 6.853083763524368, |
|
"learning_rate": 3.5923612809233987e-06, |
|
"logits": -0.8166040778160095, |
|
"logps": -74.46318817138672, |
|
"loss": 0.1422, |
|
"objective": 0.14259177446365356, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.14259177446365356, |
|
"step": 370 |
|
}, |
|
{ |
|
"dpo_loss": 0.5261290669441223, |
|
"epoch": 2.1256495040151155, |
|
"grad_norm": 6.737212175634408, |
|
"learning_rate": 3.547549834686222e-06, |
|
"logits": -0.7999446392059326, |
|
"logps": -76.43958282470703, |
|
"loss": 0.1396, |
|
"objective": 0.14750143885612488, |
|
"ranking_idealized": 0.7291666865348816, |
|
"ranking_idealized_expo": 0.5958333611488342, |
|
"ranking_simple": 0.7124999761581421, |
|
"regularize": 0.14750143885612488, |
|
"step": 375 |
|
}, |
|
{ |
|
"dpo_loss": 0.5414277911186218, |
|
"epoch": 2.153991497401984, |
|
"grad_norm": 7.256488804126431, |
|
"learning_rate": 3.5023263385165346e-06, |
|
"logits": -0.7813270092010498, |
|
"logps": -75.50519561767578, |
|
"loss": 0.1433, |
|
"objective": 0.14621533453464508, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.675000011920929, |
|
"regularize": 0.14621533453464508, |
|
"step": 380 |
|
}, |
|
{ |
|
"dpo_loss": 0.5473071932792664, |
|
"epoch": 2.182333490788852, |
|
"grad_norm": 7.1971925561639445, |
|
"learning_rate": 3.4567085809127247e-06, |
|
"logits": -0.8520491123199463, |
|
"logps": -77.47991180419922, |
|
"loss": 0.1389, |
|
"objective": 0.13993406295776367, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.13993406295776367, |
|
"step": 385 |
|
}, |
|
{ |
|
"dpo_loss": 0.538723349571228, |
|
"epoch": 2.21067548417572, |
|
"grad_norm": 7.027817565044039, |
|
"learning_rate": 3.410714505454486e-06, |
|
"logits": -0.80887770652771, |
|
"logps": -76.32317352294922, |
|
"loss": 0.1308, |
|
"objective": 0.12663429975509644, |
|
"ranking_idealized": 0.6041666865348816, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.12663428485393524, |
|
"step": 390 |
|
}, |
|
{ |
|
"dpo_loss": 0.5224890112876892, |
|
"epoch": 2.2390174775625886, |
|
"grad_norm": 6.949068306199585, |
|
"learning_rate": 3.364362203744777e-06, |
|
"logits": -0.8549118638038635, |
|
"logps": -76.35110473632812, |
|
"loss": 0.1437, |
|
"objective": 0.14411191642284393, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.675000011920929, |
|
"regularize": 0.14411191642284393, |
|
"step": 395 |
|
}, |
|
{ |
|
"dpo_loss": 0.5168216228485107, |
|
"epoch": 2.2673594709494567, |
|
"grad_norm": 7.191883909269667, |
|
"learning_rate": 3.3176699082935546e-06, |
|
"logits": -0.9377852082252502, |
|
"logps": -77.43128204345703, |
|
"loss": 0.1368, |
|
"objective": 0.13501186668872833, |
|
"ranking_idealized": 0.7583333253860474, |
|
"ranking_idealized_expo": 0.5791666507720947, |
|
"ranking_simple": 0.737500011920929, |
|
"regularize": 0.13501186668872833, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.2673594709494567, |
|
"eval_dpo_loss": 0.6858804225921631, |
|
"eval_logits": -1.0155284404754639, |
|
"eval_logps": -80.20379638671875, |
|
"eval_loss": 0.4407689571380615, |
|
"eval_objective": 0.43782898783683777, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5320248007774353, |
|
"eval_regularize": 0.43782898783683777, |
|
"eval_runtime": 259.3137, |
|
"eval_samples_per_second": 22.328, |
|
"eval_steps_per_second": 0.933, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_loss": 0.537171483039856, |
|
"epoch": 2.295701464336325, |
|
"grad_norm": 7.007268345576302, |
|
"learning_rate": 3.2706559853460818e-06, |
|
"logits": -1.0055091381072998, |
|
"logps": -74.74109649658203, |
|
"loss": 0.1383, |
|
"objective": 0.1392410695552826, |
|
"ranking_idealized": 0.737500011920929, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.7083333134651184, |
|
"regularize": 0.1392410695552826, |
|
"step": 405 |
|
}, |
|
{ |
|
"dpo_loss": 0.5326074361801147, |
|
"epoch": 2.324043457723193, |
|
"grad_norm": 6.941825657006022, |
|
"learning_rate": 3.2233389276586325e-06, |
|
"logits": -0.8970204591751099, |
|
"logps": -74.2923355102539, |
|
"loss": 0.1297, |
|
"objective": 0.12273009866476059, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.12273009866476059, |
|
"step": 410 |
|
}, |
|
{ |
|
"dpo_loss": 0.5270788073539734, |
|
"epoch": 2.3523854511100613, |
|
"grad_norm": 7.366817713585052, |
|
"learning_rate": 3.1757373472244324e-06, |
|
"logits": -0.8971990942955017, |
|
"logps": -74.44580078125, |
|
"loss": 0.1319, |
|
"objective": 0.14558042585849762, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.14558042585849762, |
|
"step": 415 |
|
}, |
|
{ |
|
"dpo_loss": 0.5407485961914062, |
|
"epoch": 2.3807274444969297, |
|
"grad_norm": 7.016879855860433, |
|
"learning_rate": 3.127869967952698e-06, |
|
"logits": -0.8165015578269958, |
|
"logps": -75.92127227783203, |
|
"loss": 0.1297, |
|
"objective": 0.13260656595230103, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.13260656595230103, |
|
"step": 420 |
|
}, |
|
{ |
|
"dpo_loss": 0.526520848274231, |
|
"epoch": 2.409069437883798, |
|
"grad_norm": 6.605258428992394, |
|
"learning_rate": 3.0797556183036582e-06, |
|
"logits": -0.8272897601127625, |
|
"logps": -74.90123748779297, |
|
"loss": 0.1262, |
|
"objective": 0.12788838148117065, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.12788838148117065, |
|
"step": 425 |
|
}, |
|
{ |
|
"dpo_loss": 0.5405741333961487, |
|
"epoch": 2.4374114312706663, |
|
"grad_norm": 6.6924189063588955, |
|
"learning_rate": 3.0314132238824416e-06, |
|
"logits": -0.8260743021965027, |
|
"logps": -75.06442260742188, |
|
"loss": 0.1266, |
|
"objective": 0.11811564862728119, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.11811564117670059, |
|
"step": 430 |
|
}, |
|
{ |
|
"dpo_loss": 0.5238969922065735, |
|
"epoch": 2.4657534246575343, |
|
"grad_norm": 6.903355919273098, |
|
"learning_rate": 2.9828617999947647e-06, |
|
"logits": -0.8589097857475281, |
|
"logps": -75.69316101074219, |
|
"loss": 0.1249, |
|
"objective": 0.12512782216072083, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6708333492279053, |
|
"regularize": 0.12512782216072083, |
|
"step": 435 |
|
}, |
|
{ |
|
"dpo_loss": 0.5445213317871094, |
|
"epoch": 2.4940954180444024, |
|
"grad_norm": 6.961032298685898, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"logits": -0.7675038576126099, |
|
"logps": -74.55623626708984, |
|
"loss": 0.126, |
|
"objective": 0.12659841775894165, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.12659841775894165, |
|
"step": 440 |
|
}, |
|
{ |
|
"dpo_loss": 0.5370126366615295, |
|
"epoch": 2.5224374114312704, |
|
"grad_norm": 6.94281741073779, |
|
"learning_rate": 2.8852083286358647e-06, |
|
"logits": -0.7942711710929871, |
|
"logps": -71.90986633300781, |
|
"loss": 0.1211, |
|
"objective": 0.13307242095470428, |
|
"ranking_idealized": 0.6958333253860474, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.675000011920929, |
|
"regularize": 0.13307242095470428, |
|
"step": 445 |
|
}, |
|
{ |
|
"dpo_loss": 0.5622422099113464, |
|
"epoch": 2.550779404818139, |
|
"grad_norm": 7.14501016601336, |
|
"learning_rate": 2.8361446928038298e-06, |
|
"logits": -0.8527530431747437, |
|
"logps": -74.12069702148438, |
|
"loss": 0.122, |
|
"objective": 0.13083526492118835, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.612500011920929, |
|
"regularize": 0.13083526492118835, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.550779404818139, |
|
"eval_dpo_loss": 0.6862838268280029, |
|
"eval_logits": -0.8946070671081543, |
|
"eval_logps": -78.42880249023438, |
|
"eval_loss": 0.44150272011756897, |
|
"eval_objective": 0.44035181403160095, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5258264541625977, |
|
"eval_regularize": 0.44035181403160095, |
|
"eval_runtime": 258.0077, |
|
"eval_samples_per_second": 22.441, |
|
"eval_steps_per_second": 0.938, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_loss": 0.5327574610710144, |
|
"epoch": 2.579121398205007, |
|
"grad_norm": 7.160178764520009, |
|
"learning_rate": 2.7869488356746344e-06, |
|
"logits": -0.8465222716331482, |
|
"logps": -74.07278442382812, |
|
"loss": 0.1216, |
|
"objective": 0.1370854526758194, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6625000238418579, |
|
"regularize": 0.1370854526758194, |
|
"step": 455 |
|
}, |
|
{ |
|
"dpo_loss": 0.5250583291053772, |
|
"epoch": 2.6074633915918755, |
|
"grad_norm": 7.146508258603712, |
|
"learning_rate": 2.7376401082604563e-06, |
|
"logits": -0.8942106366157532, |
|
"logps": -74.29560852050781, |
|
"loss": 0.1165, |
|
"objective": 0.12135622650384903, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6666666865348816, |
|
"regularize": 0.12135622650384903, |
|
"step": 460 |
|
}, |
|
{ |
|
"dpo_loss": 0.530348539352417, |
|
"epoch": 2.6358053849787435, |
|
"grad_norm": 7.184783585057386, |
|
"learning_rate": 2.6882379059705953e-06, |
|
"logits": -0.7349064350128174, |
|
"logps": -73.84181213378906, |
|
"loss": 0.1204, |
|
"objective": 0.1292448341846466, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.1292448341846466, |
|
"step": 465 |
|
}, |
|
{ |
|
"dpo_loss": 0.5289201140403748, |
|
"epoch": 2.6641473783656116, |
|
"grad_norm": 7.1334661921422216, |
|
"learning_rate": 2.6387616609823506e-06, |
|
"logits": -0.7510494589805603, |
|
"logps": -73.42973327636719, |
|
"loss": 0.1121, |
|
"objective": 0.10491514950990677, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.10491514950990677, |
|
"step": 470 |
|
}, |
|
{ |
|
"dpo_loss": 0.5439261794090271, |
|
"epoch": 2.69248937175248, |
|
"grad_norm": 7.044559545822627, |
|
"learning_rate": 2.5892308345974517e-06, |
|
"logits": -0.7502660155296326, |
|
"logps": -73.04237365722656, |
|
"loss": 0.1114, |
|
"objective": 0.11930320411920547, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.11930320411920547, |
|
"step": 475 |
|
}, |
|
{ |
|
"dpo_loss": 0.528633177280426, |
|
"epoch": 2.720831365139348, |
|
"grad_norm": 7.216679182755541, |
|
"learning_rate": 2.53966490958702e-06, |
|
"logits": -0.832193911075592, |
|
"logps": -73.19383239746094, |
|
"loss": 0.1104, |
|
"objective": 0.10962951928377151, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.6916666626930237, |
|
"regularize": 0.10962951928377151, |
|
"step": 480 |
|
}, |
|
{ |
|
"dpo_loss": 0.5266162753105164, |
|
"epoch": 2.7491733585262166, |
|
"grad_norm": 6.942552990674209, |
|
"learning_rate": 2.490083382528097e-06, |
|
"logits": -0.780593752861023, |
|
"logps": -75.74951171875, |
|
"loss": 0.1126, |
|
"objective": 0.10767225921154022, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.675000011920929, |
|
"regularize": 0.10767225176095963, |
|
"step": 485 |
|
}, |
|
{ |
|
"dpo_loss": 0.5204401016235352, |
|
"epoch": 2.7775153519130846, |
|
"grad_norm": 6.79511973951677, |
|
"learning_rate": 2.440505756134732e-06, |
|
"logits": -0.7421233654022217, |
|
"logps": -74.27189636230469, |
|
"loss": 0.1117, |
|
"objective": 0.11147340387105942, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.4749999940395355, |
|
"ranking_simple": 0.6833333373069763, |
|
"regularize": 0.11147340387105942, |
|
"step": 490 |
|
}, |
|
{ |
|
"dpo_loss": 0.5330770015716553, |
|
"epoch": 2.8058573452999527, |
|
"grad_norm": 7.282976503781254, |
|
"learning_rate": 2.3909515315866606e-06, |
|
"logits": -0.7717820405960083, |
|
"logps": -72.36864471435547, |
|
"loss": 0.1034, |
|
"objective": 0.10382074862718582, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.4833333194255829, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.10382074862718582, |
|
"step": 495 |
|
}, |
|
{ |
|
"dpo_loss": 0.5220891833305359, |
|
"epoch": 2.8341993386868207, |
|
"grad_norm": 7.004605377489112, |
|
"learning_rate": 2.341440200858589e-06, |
|
"logits": -0.822429895401001, |
|
"logps": -71.28691864013672, |
|
"loss": 0.1063, |
|
"objective": 0.11509209126234055, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6666666865348816, |
|
"regularize": 0.11509209126234055, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.8341993386868207, |
|
"eval_dpo_loss": 0.6860550045967102, |
|
"eval_logits": -0.8682713508605957, |
|
"eval_logps": -78.1278076171875, |
|
"eval_loss": 0.44106075167655945, |
|
"eval_objective": 0.43840065598487854, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5299586653709412, |
|
"eval_regularize": 0.43840065598487854, |
|
"eval_runtime": 259.0969, |
|
"eval_samples_per_second": 22.347, |
|
"eval_steps_per_second": 0.934, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_loss": 0.5273423790931702, |
|
"epoch": 2.862541332073689, |
|
"grad_norm": 6.758921464396605, |
|
"learning_rate": 2.2919912390530945e-06, |
|
"logits": -0.7519776225090027, |
|
"logps": -72.76760864257812, |
|
"loss": 0.1035, |
|
"objective": 0.10023737698793411, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6833333373069763, |
|
"regularize": 0.10023736953735352, |
|
"step": 505 |
|
}, |
|
{ |
|
"dpo_loss": 0.5383204817771912, |
|
"epoch": 2.8908833254605573, |
|
"grad_norm": 6.749917335321248, |
|
"learning_rate": 2.242624096740164e-06, |
|
"logits": -0.7136736512184143, |
|
"logps": -73.48322296142578, |
|
"loss": 0.1008, |
|
"objective": 0.10292276740074158, |
|
"ranking_idealized": 0.6958333253860474, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.675000011920929, |
|
"regularize": 0.10292276740074158, |
|
"step": 510 |
|
}, |
|
{ |
|
"dpo_loss": 0.5426943302154541, |
|
"epoch": 2.9192253188474258, |
|
"grad_norm": 6.80108683493094, |
|
"learning_rate": 2.193358192306384e-06, |
|
"logits": -0.844546914100647, |
|
"logps": -72.76075744628906, |
|
"loss": 0.1039, |
|
"objective": 0.10740550607442856, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.10740550607442856, |
|
"step": 515 |
|
}, |
|
{ |
|
"dpo_loss": 0.5363429188728333, |
|
"epoch": 2.947567312234294, |
|
"grad_norm": 7.22597227685905, |
|
"learning_rate": 2.1442129043167877e-06, |
|
"logits": -0.7738971710205078, |
|
"logps": -74.9779281616211, |
|
"loss": 0.102, |
|
"objective": 0.11110852658748627, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.11110852658748627, |
|
"step": 520 |
|
}, |
|
{ |
|
"dpo_loss": 0.5326921343803406, |
|
"epoch": 2.975909305621162, |
|
"grad_norm": 7.020529204895981, |
|
"learning_rate": 2.0952075638923656e-06, |
|
"logits": -0.773563027381897, |
|
"logps": -73.9379653930664, |
|
"loss": 0.0967, |
|
"objective": 0.1049317866563797, |
|
"ranking_idealized": 0.6875, |
|
"ranking_idealized_expo": 0.5708333253860474, |
|
"ranking_simple": 0.699999988079071, |
|
"regularize": 0.1049317866563797, |
|
"step": 525 |
|
}, |
|
{ |
|
"dpo_loss": 0.5151563286781311, |
|
"epoch": 3.0042512990080303, |
|
"grad_norm": 6.663633146942429, |
|
"learning_rate": 2.046361447106244e-06, |
|
"logits": -0.7806794047355652, |
|
"logps": -73.31244659423828, |
|
"loss": 0.0987, |
|
"objective": 0.10096151381731033, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6708333492279053, |
|
"regularize": 0.10096149891614914, |
|
"step": 530 |
|
}, |
|
{ |
|
"dpo_loss": 0.5298264622688293, |
|
"epoch": 3.0325932923948984, |
|
"grad_norm": 6.838429513652437, |
|
"learning_rate": 1.997693767401503e-06, |
|
"logits": -0.8067893981933594, |
|
"logps": -75.07320404052734, |
|
"loss": 0.0914, |
|
"objective": 0.09071025252342224, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6708333492279053, |
|
"regularize": 0.09071025252342224, |
|
"step": 535 |
|
}, |
|
{ |
|
"dpo_loss": 0.5308666825294495, |
|
"epoch": 3.0609352857817664, |
|
"grad_norm": 6.786419151482898, |
|
"learning_rate": 1.9492236680336486e-06, |
|
"logits": -0.8495451807975769, |
|
"logps": -72.48417663574219, |
|
"loss": 0.0869, |
|
"objective": 0.08104575425386429, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.0810457393527031, |
|
"step": 540 |
|
}, |
|
{ |
|
"dpo_loss": 0.5390760898590088, |
|
"epoch": 3.089277279168635, |
|
"grad_norm": 6.715619451066239, |
|
"learning_rate": 1.9009702145406728e-06, |
|
"logits": -0.7783963084220886, |
|
"logps": -73.88996887207031, |
|
"loss": 0.0873, |
|
"objective": 0.08420184254646301, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.49166667461395264, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.08420184254646301, |
|
"step": 545 |
|
}, |
|
{ |
|
"dpo_loss": 0.5265496373176575, |
|
"epoch": 3.117619272555503, |
|
"grad_norm": 6.799228974819442, |
|
"learning_rate": 1.852952387243698e-06, |
|
"logits": -0.6418666243553162, |
|
"logps": -73.9046401977539, |
|
"loss": 0.0878, |
|
"objective": 0.09073540568351746, |
|
"ranking_idealized": 0.699999988079071, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.7041666507720947, |
|
"regularize": 0.09073540568351746, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.117619272555503, |
|
"eval_dpo_loss": 0.6847736239433289, |
|
"eval_logits": -0.8292139172554016, |
|
"eval_logps": -77.63910675048828, |
|
"eval_loss": 0.44063544273376465, |
|
"eval_objective": 0.43784743547439575, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5330578684806824, |
|
"eval_regularize": 0.43784743547439575, |
|
"eval_runtime": 259.006, |
|
"eval_samples_per_second": 22.355, |
|
"eval_steps_per_second": 0.934, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_loss": 0.5151117444038391, |
|
"epoch": 3.1459612659423715, |
|
"grad_norm": 7.0486870886771955, |
|
"learning_rate": 1.8051890737811395e-06, |
|
"logits": -0.6810140013694763, |
|
"logps": -74.01383972167969, |
|
"loss": 0.0855, |
|
"objective": 0.08012814819812775, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6833333373069763, |
|
"regularize": 0.08012814074754715, |
|
"step": 555 |
|
}, |
|
{ |
|
"dpo_loss": 0.5303942561149597, |
|
"epoch": 3.1743032593292395, |
|
"grad_norm": 6.686572075133422, |
|
"learning_rate": 1.7576990616793139e-06, |
|
"logits": -0.7435484528541565, |
|
"logps": -70.75598907470703, |
|
"loss": 0.0855, |
|
"objective": 0.07928713411092758, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.07928712666034698, |
|
"step": 560 |
|
}, |
|
{ |
|
"dpo_loss": 0.5257388353347778, |
|
"epoch": 3.2026452527161076, |
|
"grad_norm": 7.035317567433199, |
|
"learning_rate": 1.7105010309624381e-06, |
|
"logits": -0.7857434153556824, |
|
"logps": -71.99687194824219, |
|
"loss": 0.0807, |
|
"objective": 0.07652737945318222, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6833333373069763, |
|
"regularize": 0.07652737945318222, |
|
"step": 565 |
|
}, |
|
{ |
|
"dpo_loss": 0.5218066573143005, |
|
"epoch": 3.230987246102976, |
|
"grad_norm": 6.567722467858525, |
|
"learning_rate": 1.6636135468049122e-06, |
|
"logits": -0.7239015698432922, |
|
"logps": -72.04088592529297, |
|
"loss": 0.0814, |
|
"objective": 0.07984793186187744, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.6291666626930237, |
|
"regularize": 0.07984793186187744, |
|
"step": 570 |
|
}, |
|
{ |
|
"dpo_loss": 0.5289373397827148, |
|
"epoch": 3.259329239489844, |
|
"grad_norm": 6.699684239267555, |
|
"learning_rate": 1.617055052228768e-06, |
|
"logits": -0.7765447497367859, |
|
"logps": -72.74471282958984, |
|
"loss": 0.0823, |
|
"objective": 0.08229862153530121, |
|
"ranking_idealized": 0.7083333134651184, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6958333253860474, |
|
"regularize": 0.08229862153530121, |
|
"step": 575 |
|
}, |
|
{ |
|
"dpo_loss": 0.5433183908462524, |
|
"epoch": 3.287671232876712, |
|
"grad_norm": 6.701807007044451, |
|
"learning_rate": 1.5708438608491816e-06, |
|
"logits": -0.7891409397125244, |
|
"logps": -73.1654052734375, |
|
"loss": 0.0794, |
|
"objective": 0.07175194472074509, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.0717519223690033, |
|
"step": 580 |
|
}, |
|
{ |
|
"dpo_loss": 0.5400884747505188, |
|
"epoch": 3.3160132262635806, |
|
"grad_norm": 7.671988032753608, |
|
"learning_rate": 1.524998149670871e-06, |
|
"logits": -0.817208468914032, |
|
"logps": -74.3894271850586, |
|
"loss": 0.0807, |
|
"objective": 0.0763852447271347, |
|
"ranking_idealized": 0.6958333253860474, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.6958333253860474, |
|
"regularize": 0.0763852447271347, |
|
"step": 585 |
|
}, |
|
{ |
|
"dpo_loss": 0.539016604423523, |
|
"epoch": 3.3443552196504487, |
|
"grad_norm": 7.045114972527912, |
|
"learning_rate": 1.479535951938243e-06, |
|
"logits": -0.8479073643684387, |
|
"logps": -74.54483795166016, |
|
"loss": 0.0782, |
|
"objective": 0.07480078190565109, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.6708333492279053, |
|
"regularize": 0.07480078190565109, |
|
"step": 590 |
|
}, |
|
{ |
|
"dpo_loss": 0.5274596810340881, |
|
"epoch": 3.372697213037317, |
|
"grad_norm": 6.813876249894319, |
|
"learning_rate": 1.43447515004208e-06, |
|
"logits": -0.7586421966552734, |
|
"logps": -73.8796615600586, |
|
"loss": 0.0725, |
|
"objective": 0.06939196586608887, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.06939195841550827, |
|
"step": 595 |
|
}, |
|
{ |
|
"dpo_loss": 0.507723331451416, |
|
"epoch": 3.4010392064241852, |
|
"grad_norm": 6.843883025807739, |
|
"learning_rate": 1.3898334684855647e-06, |
|
"logits": -0.7954932451248169, |
|
"logps": -73.50051879882812, |
|
"loss": 0.0719, |
|
"objective": 0.0723666176199913, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6875, |
|
"regularize": 0.0723666176199913, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.4010392064241852, |
|
"eval_dpo_loss": 0.6851052045822144, |
|
"eval_logits": -0.8875375390052795, |
|
"eval_logps": -77.49230194091797, |
|
"eval_loss": 0.439556360244751, |
|
"eval_objective": 0.43727535009384155, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5309917330741882, |
|
"eval_regularize": 0.43727535009384155, |
|
"eval_runtime": 260.0756, |
|
"eval_samples_per_second": 22.263, |
|
"eval_steps_per_second": 0.93, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_loss": 0.539269745349884, |
|
"epoch": 3.4293811998110533, |
|
"grad_norm": 6.836932990419662, |
|
"learning_rate": 1.3456284669124159e-06, |
|
"logits": -0.8135491013526917, |
|
"logps": -75.25065612792969, |
|
"loss": 0.072, |
|
"objective": 0.06646443903446198, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6708333492279053, |
|
"regularize": 0.06646443903446198, |
|
"step": 605 |
|
}, |
|
{ |
|
"dpo_loss": 0.532633364200592, |
|
"epoch": 3.4577231931979218, |
|
"grad_norm": 6.918313902636074, |
|
"learning_rate": 1.301877533199859e-06, |
|
"logits": -0.812556266784668, |
|
"logps": -73.47212219238281, |
|
"loss": 0.0726, |
|
"objective": 0.06124640628695488, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.675000011920929, |
|
"regularize": 0.06124639883637428, |
|
"step": 610 |
|
}, |
|
{ |
|
"dpo_loss": 0.522627055644989, |
|
"epoch": 3.48606518658479, |
|
"grad_norm": 6.525966688786376, |
|
"learning_rate": 1.2585978766191726e-06, |
|
"logits": -0.8209077715873718, |
|
"logps": -74.44559478759766, |
|
"loss": 0.0694, |
|
"objective": 0.0690179094672203, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.47083333134651184, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.0690179094672203, |
|
"step": 615 |
|
}, |
|
{ |
|
"dpo_loss": 0.5149637460708618, |
|
"epoch": 3.514407179971658, |
|
"grad_norm": 6.444084091552882, |
|
"learning_rate": 1.2158065210664848e-06, |
|
"logits": -0.734274685382843, |
|
"logps": -73.22693634033203, |
|
"loss": 0.0739, |
|
"objective": 0.06696704030036926, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.6333333253860474, |
|
"regularize": 0.06696703284978867, |
|
"step": 620 |
|
}, |
|
{ |
|
"dpo_loss": 0.5303381085395813, |
|
"epoch": 3.5427491733585263, |
|
"grad_norm": 7.253457560302881, |
|
"learning_rate": 1.1735202983664803e-06, |
|
"logits": -0.763002336025238, |
|
"logps": -71.75556182861328, |
|
"loss": 0.0684, |
|
"objective": 0.06924500316381454, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.675000011920929, |
|
"regularize": 0.06924500316381454, |
|
"step": 625 |
|
}, |
|
{ |
|
"dpo_loss": 0.5344926118850708, |
|
"epoch": 3.5710911667453944, |
|
"grad_norm": 7.160250572063496, |
|
"learning_rate": 1.1317558416516696e-06, |
|
"logits": -0.8248269557952881, |
|
"logps": -72.7044677734375, |
|
"loss": 0.0685, |
|
"objective": 0.06329541653394699, |
|
"ranking_idealized": 0.6333333253860474, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.0632954090833664, |
|
"step": 630 |
|
}, |
|
{ |
|
"dpo_loss": 0.5331679582595825, |
|
"epoch": 3.5994331601322624, |
|
"grad_norm": 6.5609855806488575, |
|
"learning_rate": 1.0905295788197993e-06, |
|
"logits": -0.7775312066078186, |
|
"logps": -72.57146453857422, |
|
"loss": 0.0645, |
|
"objective": 0.06953860074281693, |
|
"ranking_idealized": 0.637499988079071, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.6208333373069763, |
|
"regularize": 0.06953860074281693, |
|
"step": 635 |
|
}, |
|
{ |
|
"dpo_loss": 0.5081437826156616, |
|
"epoch": 3.627775153519131, |
|
"grad_norm": 6.931309552817511, |
|
"learning_rate": 1.049857726072005e-06, |
|
"logits": -0.7663463354110718, |
|
"logps": -74.27519989013672, |
|
"loss": 0.0641, |
|
"objective": 0.06706919521093369, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.6458333134651184, |
|
"regularize": 0.06706918776035309, |
|
"step": 640 |
|
}, |
|
{ |
|
"dpo_loss": 0.5230153799057007, |
|
"epoch": 3.656117146905999, |
|
"grad_norm": 6.607222677286154, |
|
"learning_rate": 1.0097562815342215e-06, |
|
"logits": -0.7920152544975281, |
|
"logps": -71.90898132324219, |
|
"loss": 0.0644, |
|
"objective": 0.06532428413629532, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5249999761581421, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.06532428413629532, |
|
"step": 645 |
|
}, |
|
{ |
|
"dpo_loss": 0.5418646335601807, |
|
"epoch": 3.6844591402928675, |
|
"grad_norm": 6.694188599643782, |
|
"learning_rate": 9.702410189643838e-07, |
|
"logits": -0.8044089674949646, |
|
"logps": -72.86306762695312, |
|
"loss": 0.0618, |
|
"objective": 0.05348058044910431, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.05348057672381401, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.6844591402928675, |
|
"eval_dpo_loss": 0.6855461001396179, |
|
"eval_logits": -0.9103026986122131, |
|
"eval_logps": -77.18383026123047, |
|
"eval_loss": 0.4394647479057312, |
|
"eval_objective": 0.43862253427505493, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5268595218658447, |
|
"eval_regularize": 0.43862253427505493, |
|
"eval_runtime": 259.3939, |
|
"eval_samples_per_second": 22.321, |
|
"eval_steps_per_second": 0.933, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_loss": 0.5326651930809021, |
|
"epoch": 3.7128011336797355, |
|
"grad_norm": 6.747215671656663, |
|
"learning_rate": 9.313274815478698e-07, |
|
"logits": -0.8151116967201233, |
|
"logps": -72.32289123535156, |
|
"loss": 0.0682, |
|
"objective": 0.06774523854255676, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6666666865348816, |
|
"regularize": 0.06774523109197617, |
|
"step": 655 |
|
}, |
|
{ |
|
"dpo_loss": 0.5296677947044373, |
|
"epoch": 3.7411431270666036, |
|
"grad_norm": 6.603209874734441, |
|
"learning_rate": 8.930309757836517e-07, |
|
"logits": -0.8544061779975891, |
|
"logps": -72.76691436767578, |
|
"loss": 0.0605, |
|
"objective": 0.06562329083681107, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.6958333253860474, |
|
"regularize": 0.06562329083681107, |
|
"step": 660 |
|
}, |
|
{ |
|
"dpo_loss": 0.5254350900650024, |
|
"epoch": 3.769485120453472, |
|
"grad_norm": 6.7420568501361, |
|
"learning_rate": 8.553665654635343e-07, |
|
"logits": -0.7711302042007446, |
|
"logps": -72.90251159667969, |
|
"loss": 0.0589, |
|
"objective": 0.05725221708416939, |
|
"ranking_idealized": 0.6958333253860474, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6791666746139526, |
|
"regularize": 0.057252202183008194, |
|
"step": 665 |
|
}, |
|
{ |
|
"dpo_loss": 0.529458224773407, |
|
"epoch": 3.79782711384034, |
|
"grad_norm": 6.596226604829734, |
|
"learning_rate": 8.183490657468687e-07, |
|
"logits": -0.874411940574646, |
|
"logps": -73.00768280029297, |
|
"loss": 0.0592, |
|
"objective": 0.0637064203619957, |
|
"ranking_idealized": 0.7333333492279053, |
|
"ranking_idealized_expo": 0.5958333611488342, |
|
"ranking_simple": 0.7333333492279053, |
|
"regularize": 0.0637064203619957, |
|
"step": 670 |
|
}, |
|
{ |
|
"dpo_loss": 0.5332936644554138, |
|
"epoch": 3.826169107227208, |
|
"grad_norm": 6.620708805759934, |
|
"learning_rate": 7.819930373330669e-07, |
|
"logits": -0.8352341055870056, |
|
"logps": -72.00785827636719, |
|
"loss": 0.0556, |
|
"objective": 0.05346338450908661, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.6666666865348816, |
|
"regularize": 0.05346338450908661, |
|
"step": 675 |
|
}, |
|
{ |
|
"dpo_loss": 0.5433677434921265, |
|
"epoch": 3.8545111006140766, |
|
"grad_norm": 6.502071174130889, |
|
"learning_rate": 7.463127807341966e-07, |
|
"logits": -0.7840080261230469, |
|
"logps": -72.76580047607422, |
|
"loss": 0.0571, |
|
"objective": 0.05716581270098686, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.057165808975696564, |
|
"step": 680 |
|
}, |
|
{ |
|
"dpo_loss": 0.5243024230003357, |
|
"epoch": 3.8828530940009447, |
|
"grad_norm": 6.827039590730102, |
|
"learning_rate": 7.113223306499336e-07, |
|
"logits": -0.8465909361839294, |
|
"logps": -72.07350158691406, |
|
"loss": 0.0572, |
|
"objective": 0.053640857338905334, |
|
"ranking_idealized": 0.6875, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6791666746139526, |
|
"regularize": 0.05364084988832474, |
|
"step": 685 |
|
}, |
|
{ |
|
"dpo_loss": 0.5204752087593079, |
|
"epoch": 3.9111950873878127, |
|
"grad_norm": 6.658562071017675, |
|
"learning_rate": 6.770354504470575e-07, |
|
"logits": -0.8739193677902222, |
|
"logps": -71.3963394165039, |
|
"loss": 0.0561, |
|
"objective": 0.054595671594142914, |
|
"ranking_idealized": 0.7041666507720947, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6958333253860474, |
|
"regularize": 0.05459566414356232, |
|
"step": 690 |
|
}, |
|
{ |
|
"dpo_loss": 0.5284795761108398, |
|
"epoch": 3.9395370807746812, |
|
"grad_norm": 6.480093534880764, |
|
"learning_rate": 6.434656267456843e-07, |
|
"logits": -0.8127073049545288, |
|
"logps": -72.72264862060547, |
|
"loss": 0.0567, |
|
"objective": 0.06189308688044548, |
|
"ranking_idealized": 0.6083333492279053, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.6083333492279053, |
|
"regularize": 0.06189308688044548, |
|
"step": 695 |
|
}, |
|
{ |
|
"dpo_loss": 0.5350829362869263, |
|
"epoch": 3.9678790741615493, |
|
"grad_norm": 6.7060271282278245, |
|
"learning_rate": 6.106260641143547e-07, |
|
"logits": -0.833633303642273, |
|
"logps": -73.22267150878906, |
|
"loss": 0.0551, |
|
"objective": 0.05491795390844345, |
|
"ranking_idealized": 0.612500011920929, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.6166666746139526, |
|
"regularize": 0.05491795018315315, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.9678790741615493, |
|
"eval_dpo_loss": 0.6859015226364136, |
|
"eval_logits": -0.9137452244758606, |
|
"eval_logps": -77.72093200683594, |
|
"eval_loss": 0.440186470746994, |
|
"eval_objective": 0.43875548243522644, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5289255976676941, |
|
"eval_regularize": 0.43875548243522644, |
|
"eval_runtime": 259.3718, |
|
"eval_samples_per_second": 22.323, |
|
"eval_steps_per_second": 0.933, |
|
"step": 700 |
|
}, |
|
{ |
|
"dpo_loss": 0.5229139924049377, |
|
"epoch": 3.9962210675484178, |
|
"grad_norm": 6.704821856039977, |
|
"learning_rate": 5.785296798760601e-07, |
|
"logits": -0.7782571911811829, |
|
"logps": -72.21749877929688, |
|
"loss": 0.0552, |
|
"objective": 0.048626501113176346, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6666666865348816, |
|
"regularize": 0.04862649738788605, |
|
"step": 705 |
|
}, |
|
{ |
|
"dpo_loss": 0.5162150263786316, |
|
"epoch": 4.024563060935286, |
|
"grad_norm": 7.292565223355315, |
|
"learning_rate": 5.471890990272666e-07, |
|
"logits": -0.814688503742218, |
|
"logps": -73.78315734863281, |
|
"loss": 0.0446, |
|
"objective": 0.045689478516578674, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.4791666567325592, |
|
"ranking_simple": 0.675000011920929, |
|
"regularize": 0.04568947106599808, |
|
"step": 710 |
|
}, |
|
{ |
|
"dpo_loss": 0.5273666977882385, |
|
"epoch": 4.052905054322154, |
|
"grad_norm": 6.71901500064993, |
|
"learning_rate": 5.166166492719124e-07, |
|
"logits": -0.7857570648193359, |
|
"logps": -72.7701187133789, |
|
"loss": 0.0422, |
|
"objective": 0.04041726142168045, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5416666865348816, |
|
"ranking_simple": 0.675000011920929, |
|
"regularize": 0.04041723534464836, |
|
"step": 715 |
|
}, |
|
{ |
|
"dpo_loss": 0.5177367329597473, |
|
"epoch": 4.081247047709022, |
|
"grad_norm": 6.659794774093643, |
|
"learning_rate": 4.868243561723535e-07, |
|
"logits": -0.7260258793830872, |
|
"logps": -75.34712219238281, |
|
"loss": 0.0448, |
|
"objective": 0.041196659207344055, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.04119665548205376, |
|
"step": 720 |
|
}, |
|
{ |
|
"dpo_loss": 0.5408446192741394, |
|
"epoch": 4.109589041095891, |
|
"grad_norm": 6.565784729673605, |
|
"learning_rate": 4.57823938419153e-07, |
|
"logits": -0.786972165107727, |
|
"logps": -72.7632064819336, |
|
"loss": 0.0445, |
|
"objective": 0.04578384384512901, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.045783836394548416, |
|
"step": 725 |
|
}, |
|
{ |
|
"dpo_loss": 0.5255261659622192, |
|
"epoch": 4.137931034482759, |
|
"grad_norm": 6.9190144367452975, |
|
"learning_rate": 4.2962680322157335e-07, |
|
"logits": -0.8668403625488281, |
|
"logps": -72.80368041992188, |
|
"loss": 0.0469, |
|
"objective": 0.0492975153028965, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.675000011920929, |
|
"regularize": 0.04929749667644501, |
|
"step": 730 |
|
}, |
|
{ |
|
"dpo_loss": 0.5357745885848999, |
|
"epoch": 4.166273027869627, |
|
"grad_norm": 6.713218850119101, |
|
"learning_rate": 4.0224404182059443e-07, |
|
"logits": -0.7645056843757629, |
|
"logps": -74.07119750976562, |
|
"loss": 0.04, |
|
"objective": 0.04291637986898422, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6791666746139526, |
|
"regularize": 0.042916372418403625, |
|
"step": 735 |
|
}, |
|
{ |
|
"dpo_loss": 0.5108519196510315, |
|
"epoch": 4.194615021256495, |
|
"grad_norm": 6.802036362247038, |
|
"learning_rate": 3.756864251262143e-07, |
|
"logits": -0.8028141260147095, |
|
"logps": -73.28968048095703, |
|
"loss": 0.0412, |
|
"objective": 0.044031720608472824, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5208333134651184, |
|
"ranking_simple": 0.6708333492279053, |
|
"regularize": 0.04403171315789223, |
|
"step": 740 |
|
}, |
|
{ |
|
"dpo_loss": 0.5228927731513977, |
|
"epoch": 4.222957014643363, |
|
"grad_norm": 6.653699462048885, |
|
"learning_rate": 3.499643994807486e-07, |
|
"logits": -0.8697967529296875, |
|
"logps": -70.57527160644531, |
|
"loss": 0.0428, |
|
"objective": 0.04283791035413742, |
|
"ranking_idealized": 0.6291666626930237, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.042837902903556824, |
|
"step": 745 |
|
}, |
|
{ |
|
"dpo_loss": 0.5164041519165039, |
|
"epoch": 4.251299008030231, |
|
"grad_norm": 6.911061908397223, |
|
"learning_rate": 3.250880825498026e-07, |
|
"logits": -0.9160488843917847, |
|
"logps": -72.56361389160156, |
|
"loss": 0.0388, |
|
"objective": 0.03974687308073044, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.03974686935544014, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.251299008030231, |
|
"eval_dpo_loss": 0.685931921005249, |
|
"eval_logits": -0.8975909352302551, |
|
"eval_logps": -77.07003021240234, |
|
"eval_loss": 0.4403546452522278, |
|
"eval_objective": 0.438612699508667, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5309917330741882, |
|
"eval_regularize": 0.438612699508667, |
|
"eval_runtime": 259.3145, |
|
"eval_samples_per_second": 22.328, |
|
"eval_steps_per_second": 0.933, |
|
"step": 750 |
|
}, |
|
{ |
|
"dpo_loss": 0.5212615132331848, |
|
"epoch": 4.2796410014171, |
|
"grad_norm": 6.600495039472016, |
|
"learning_rate": 3.0106725934252095e-07, |
|
"logits": -0.8632883429527283, |
|
"logps": -71.9403076171875, |
|
"loss": 0.0401, |
|
"objective": 0.039546623826026917, |
|
"ranking_idealized": 0.6791666746139526, |
|
"ranking_idealized_expo": 0.5333333611488342, |
|
"ranking_simple": 0.6708333492279053, |
|
"regularize": 0.03954662010073662, |
|
"step": 755 |
|
}, |
|
{ |
|
"dpo_loss": 0.5251290798187256, |
|
"epoch": 4.307982994803968, |
|
"grad_norm": 6.496157144727595, |
|
"learning_rate": 2.779113783626916e-07, |
|
"logits": -0.8375190496444702, |
|
"logps": -73.24321746826172, |
|
"loss": 0.0397, |
|
"objective": 0.03997815027832985, |
|
"ranking_idealized": 0.7083333134651184, |
|
"ranking_idealized_expo": 0.5666666626930237, |
|
"ranking_simple": 0.6958333253860474, |
|
"regularize": 0.039978139102458954, |
|
"step": 760 |
|
}, |
|
{ |
|
"dpo_loss": 0.5071607828140259, |
|
"epoch": 4.336324988190836, |
|
"grad_norm": 6.910390218786043, |
|
"learning_rate": 2.5562954789221164e-07, |
|
"logits": -0.8520547747612, |
|
"logps": -73.03939056396484, |
|
"loss": 0.0405, |
|
"objective": 0.036770131438970566, |
|
"ranking_idealized": 0.7291666865348816, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.7291666865348816, |
|
"regularize": 0.03677012771368027, |
|
"step": 765 |
|
}, |
|
{ |
|
"dpo_loss": 0.509007453918457, |
|
"epoch": 4.364666981577704, |
|
"grad_norm": 6.462624726733492, |
|
"learning_rate": 2.3423053240837518e-07, |
|
"logits": -0.7884809970855713, |
|
"logps": -71.66094207763672, |
|
"loss": 0.0396, |
|
"objective": 0.03581638261675835, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.5083333253860474, |
|
"ranking_simple": 0.6666666865348816, |
|
"regularize": 0.03581637516617775, |
|
"step": 770 |
|
}, |
|
{ |
|
"dpo_loss": 0.5204900503158569, |
|
"epoch": 4.393008974964572, |
|
"grad_norm": 6.9115021717677445, |
|
"learning_rate": 2.137227491364016e-07, |
|
"logits": -0.8254011869430542, |
|
"logps": -72.81352233886719, |
|
"loss": 0.0382, |
|
"objective": 0.03518719598650932, |
|
"ranking_idealized": 0.6666666865348816, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.03518717736005783, |
|
"step": 775 |
|
}, |
|
{ |
|
"dpo_loss": 0.5285161733627319, |
|
"epoch": 4.42135096835144, |
|
"grad_norm": 6.553973237097607, |
|
"learning_rate": 1.941142647385469e-07, |
|
"logits": -0.843854546546936, |
|
"logps": -71.42961120605469, |
|
"loss": 0.0393, |
|
"objective": 0.03610792011022568, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.5041666626930237, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.03610791638493538, |
|
"step": 780 |
|
}, |
|
{ |
|
"dpo_loss": 0.5424137115478516, |
|
"epoch": 4.449692961738309, |
|
"grad_norm": 6.303662663640985, |
|
"learning_rate": 1.7541279214111277e-07, |
|
"logits": -0.8690065741539001, |
|
"logps": -71.59990692138672, |
|
"loss": 0.0409, |
|
"objective": 0.0452335849404335, |
|
"ranking_idealized": 0.6583333611488342, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.04523357003927231, |
|
"step": 785 |
|
}, |
|
{ |
|
"dpo_loss": 0.5163858532905579, |
|
"epoch": 4.478034955125177, |
|
"grad_norm": 6.732149069961477, |
|
"learning_rate": 1.5762568750059604e-07, |
|
"logits": -0.8400804400444031, |
|
"logps": -73.97518920898438, |
|
"loss": 0.0369, |
|
"objective": 0.042594779282808304, |
|
"ranking_idealized": 0.7291666865348816, |
|
"ranking_idealized_expo": 0.5583333373069763, |
|
"ranking_simple": 0.7124999761581421, |
|
"regularize": 0.04259476438164711, |
|
"step": 790 |
|
}, |
|
{ |
|
"dpo_loss": 0.523327112197876, |
|
"epoch": 4.506376948512045, |
|
"grad_norm": 6.468243868538749, |
|
"learning_rate": 1.4075994731016895e-07, |
|
"logits": -0.7410406470298767, |
|
"logps": -74.17599487304688, |
|
"loss": 0.037, |
|
"objective": 0.03256476670503616, |
|
"ranking_idealized": 0.6541666388511658, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.6499999761581421, |
|
"regularize": 0.03256473317742348, |
|
"step": 795 |
|
}, |
|
{ |
|
"dpo_loss": 0.5217226147651672, |
|
"epoch": 4.534718941898913, |
|
"grad_norm": 6.810051199376682, |
|
"learning_rate": 1.2482220564763669e-07, |
|
"logits": -0.7353635430335999, |
|
"logps": -72.93279266357422, |
|
"loss": 0.0382, |
|
"objective": 0.03855961933732033, |
|
"ranking_idealized": 0.6875, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6916666626930237, |
|
"regularize": 0.03855961188673973, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.534718941898913, |
|
"eval_dpo_loss": 0.6858941912651062, |
|
"eval_logits": -0.8972411155700684, |
|
"eval_logps": -77.24726867675781, |
|
"eval_loss": 0.4401608407497406, |
|
"eval_objective": 0.4384419023990631, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5320248007774353, |
|
"eval_regularize": 0.4384419023990631, |
|
"eval_runtime": 259.7863, |
|
"eval_samples_per_second": 22.288, |
|
"eval_steps_per_second": 0.932, |
|
"step": 800 |
|
}, |
|
{ |
|
"dpo_loss": 0.5223442316055298, |
|
"epoch": 4.563060935285781, |
|
"grad_norm": 6.589373819397174, |
|
"learning_rate": 1.0981873156594381e-07, |
|
"logits": -0.8119328022003174, |
|
"logps": -71.91402435302734, |
|
"loss": 0.0371, |
|
"objective": 0.03391700237989426, |
|
"ranking_idealized": 0.6458333134651184, |
|
"ranking_idealized_expo": 0.46666666865348816, |
|
"ranking_simple": 0.6541666388511658, |
|
"regularize": 0.03391699120402336, |
|
"step": 805 |
|
}, |
|
{ |
|
"dpo_loss": 0.5284490585327148, |
|
"epoch": 4.59140292867265, |
|
"grad_norm": 6.877467315737168, |
|
"learning_rate": 9.575542662726756e-08, |
|
"logits": -0.8636207580566406, |
|
"logps": -71.29000854492188, |
|
"loss": 0.0362, |
|
"objective": 0.034173477441072464, |
|
"ranking_idealized": 0.5874999761581421, |
|
"ranking_idealized_expo": 0.4625000059604645, |
|
"ranking_simple": 0.6000000238418579, |
|
"regularize": 0.034173473715782166, |
|
"step": 810 |
|
}, |
|
{ |
|
"dpo_loss": 0.532176673412323, |
|
"epoch": 4.619744922059518, |
|
"grad_norm": 6.410472181963987, |
|
"learning_rate": 8.26378225816582e-08, |
|
"logits": -0.7376688122749329, |
|
"logps": -72.42508697509766, |
|
"loss": 0.0338, |
|
"objective": 0.03282972797751427, |
|
"ranking_idealized": 0.7250000238418579, |
|
"ranking_idealized_expo": 0.5916666388511658, |
|
"ranking_simple": 0.737500011920929, |
|
"regularize": 0.03282969817519188, |
|
"step": 815 |
|
}, |
|
{ |
|
"dpo_loss": 0.5355073809623718, |
|
"epoch": 4.648086915446386, |
|
"grad_norm": 6.840519860011787, |
|
"learning_rate": 7.047107919114588e-08, |
|
"logits": -0.8241658806800842, |
|
"logps": -72.70903778076172, |
|
"loss": 0.0329, |
|
"objective": 0.029066000133752823, |
|
"ranking_idealized": 0.6875, |
|
"ranking_idealized_expo": 0.5625, |
|
"ranking_simple": 0.6875, |
|
"regularize": 0.029065988957881927, |
|
"step": 820 |
|
}, |
|
{ |
|
"dpo_loss": 0.5363853573799133, |
|
"epoch": 4.6764289088332545, |
|
"grad_norm": 6.5036481206185615, |
|
"learning_rate": 5.92599822001666e-08, |
|
"logits": -0.7593883872032166, |
|
"logps": -70.98949432373047, |
|
"loss": 0.0362, |
|
"objective": 0.03307075425982475, |
|
"ranking_idealized": 0.6166666746139526, |
|
"ranking_idealized_expo": 0.5, |
|
"ranking_simple": 0.6041666865348816, |
|
"regularize": 0.03307074308395386, |
|
"step": 825 |
|
}, |
|
{ |
|
"dpo_loss": 0.5210588574409485, |
|
"epoch": 4.7047709022201225, |
|
"grad_norm": 6.54657496412036, |
|
"learning_rate": 4.9008941453107527e-08, |
|
"logits": -0.882694661617279, |
|
"logps": -72.7275390625, |
|
"loss": 0.0382, |
|
"objective": 0.04189787432551384, |
|
"ranking_idealized": 0.675000011920929, |
|
"ranking_idealized_expo": 0.5166666507720947, |
|
"ranking_simple": 0.6875, |
|
"regularize": 0.04189785197377205, |
|
"step": 830 |
|
}, |
|
{ |
|
"dpo_loss": 0.5095342993736267, |
|
"epoch": 4.733112895606991, |
|
"grad_norm": 6.620950665910206, |
|
"learning_rate": 3.972198915970976e-08, |
|
"logits": -0.8053682446479797, |
|
"logps": -73.1785888671875, |
|
"loss": 0.0356, |
|
"objective": 0.03564568608999252, |
|
"ranking_idealized": 0.6499999761581421, |
|
"ranking_idealized_expo": 0.48750001192092896, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.03564564883708954, |
|
"step": 835 |
|
}, |
|
{ |
|
"dpo_loss": 0.5004899501800537, |
|
"epoch": 4.7614548889938595, |
|
"grad_norm": 6.845348700884689, |
|
"learning_rate": 3.1402778309014284e-08, |
|
"logits": -0.8175690174102783, |
|
"logps": -73.01248168945312, |
|
"loss": 0.0343, |
|
"objective": 0.030798058956861496, |
|
"ranking_idealized": 0.6958333253860474, |
|
"ranking_idealized_expo": 0.5541666746139526, |
|
"ranking_simple": 0.6875, |
|
"regularize": 0.03079797886312008, |
|
"step": 840 |
|
}, |
|
{ |
|
"dpo_loss": 0.5335594415664673, |
|
"epoch": 4.7897968823807275, |
|
"grad_norm": 6.869470340595315, |
|
"learning_rate": 2.4054581232470785e-08, |
|
"logits": -0.823119044303894, |
|
"logps": -72.64087677001953, |
|
"loss": 0.0337, |
|
"objective": 0.03280922397971153, |
|
"ranking_idealized": 0.6416666507720947, |
|
"ranking_idealized_expo": 0.4583333432674408, |
|
"ranking_simple": 0.6416666507720947, |
|
"regularize": 0.032809216529130936, |
|
"step": 845 |
|
}, |
|
{ |
|
"dpo_loss": 0.5230380892753601, |
|
"epoch": 4.818138875767596, |
|
"grad_norm": 6.728592904487897, |
|
"learning_rate": 1.768028831677926e-08, |
|
"logits": -0.8133633732795715, |
|
"logps": -71.71713256835938, |
|
"loss": 0.032, |
|
"objective": 0.028577150776982307, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6833333373069763, |
|
"regularize": 0.028577139601111412, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.818138875767596, |
|
"eval_dpo_loss": 0.686019778251648, |
|
"eval_logits": -0.8982982635498047, |
|
"eval_logps": -77.20530700683594, |
|
"eval_loss": 0.440197616815567, |
|
"eval_objective": 0.4385392367839813, |
|
"eval_ranking_idealized": 0.6570248007774353, |
|
"eval_ranking_idealized_expo": 0.5113636255264282, |
|
"eval_ranking_simple": 0.5320248007774353, |
|
"eval_regularize": 0.4385392367839813, |
|
"eval_runtime": 259.2486, |
|
"eval_samples_per_second": 22.334, |
|
"eval_steps_per_second": 0.933, |
|
"step": 850 |
|
}, |
|
{ |
|
"dpo_loss": 0.533964216709137, |
|
"epoch": 4.846480869154464, |
|
"grad_norm": 6.460809600085337, |
|
"learning_rate": 1.2282406866966078e-08, |
|
"logits": -0.7880451679229736, |
|
"logps": -72.03130340576172, |
|
"loss": 0.033, |
|
"objective": 0.03257838636636734, |
|
"ranking_idealized": 0.7083333134651184, |
|
"ranking_idealized_expo": 0.5375000238418579, |
|
"ranking_simple": 0.7083333134651184, |
|
"regularize": 0.032578371465206146, |
|
"step": 855 |
|
}, |
|
{ |
|
"dpo_loss": 0.5133672952651978, |
|
"epoch": 4.874822862541333, |
|
"grad_norm": 6.904438097045362, |
|
"learning_rate": 7.863060120144316e-09, |
|
"logits": -0.7785756587982178, |
|
"logps": -71.80149841308594, |
|
"loss": 0.034, |
|
"objective": 0.03182807192206383, |
|
"ranking_idealized": 0.7166666388511658, |
|
"ranking_idealized_expo": 0.550000011920929, |
|
"ranking_simple": 0.7124999761581421, |
|
"regularize": 0.031828057020902634, |
|
"step": 860 |
|
}, |
|
{ |
|
"dpo_loss": 0.5214442014694214, |
|
"epoch": 4.903164855928201, |
|
"grad_norm": 6.578317256063193, |
|
"learning_rate": 4.423986410346526e-09, |
|
"logits": -0.8345638513565063, |
|
"logps": -70.81376647949219, |
|
"loss": 0.0332, |
|
"objective": 0.037510212510824203, |
|
"ranking_idealized": 0.6625000238418579, |
|
"ranking_idealized_expo": 0.4958333373069763, |
|
"ranking_simple": 0.6583333611488342, |
|
"regularize": 0.03751020506024361, |
|
"step": 865 |
|
}, |
|
{ |
|
"dpo_loss": 0.5174622535705566, |
|
"epoch": 4.931506849315069, |
|
"grad_norm": 6.93105315931173, |
|
"learning_rate": 1.9665384847583622e-09, |
|
"logits": -0.8424772024154663, |
|
"logps": -72.85356903076172, |
|
"loss": 0.0344, |
|
"objective": 0.0343938022851944, |
|
"ranking_idealized": 0.6916666626930237, |
|
"ranking_idealized_expo": 0.512499988079071, |
|
"ranking_simple": 0.6875, |
|
"regularize": 0.034393779933452606, |
|
"step": 870 |
|
}, |
|
{ |
|
"dpo_loss": 0.524601399898529, |
|
"epoch": 4.959848842701937, |
|
"grad_norm": 6.733993703356916, |
|
"learning_rate": 4.916829716183901e-10, |
|
"logits": -0.790997326374054, |
|
"logps": -72.49830627441406, |
|
"loss": 0.0325, |
|
"objective": 0.03138989955186844, |
|
"ranking_idealized": 0.6833333373069763, |
|
"ranking_idealized_expo": 0.5458333492279053, |
|
"ranking_simple": 0.675000011920929, |
|
"regularize": 0.03138989210128784, |
|
"step": 875 |
|
}, |
|
{ |
|
"dpo_loss": 0.5205584764480591, |
|
"epoch": 4.988190836088805, |
|
"grad_norm": 6.678618993937897, |
|
"learning_rate": 0.0, |
|
"logits": -0.8767089247703552, |
|
"logps": -72.85419464111328, |
|
"loss": 0.0328, |
|
"objective": 0.030487608164548874, |
|
"ranking_idealized": 0.6708333492279053, |
|
"ranking_idealized_expo": 0.5291666388511658, |
|
"ranking_simple": 0.6625000238418579, |
|
"regularize": 0.030487585812807083, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.988190836088805, |
|
"step": 880, |
|
"total_flos": 0.0, |
|
"train_loss": 0.15004684163088147, |
|
"train_runtime": 35354.4588, |
|
"train_samples_per_second": 7.185, |
|
"train_steps_per_second": 0.025 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 880, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|