|
{ |
|
"best_metric": 0.42165058851242065, |
|
"best_model_checkpoint": "/mnt/data/shesj/Trained/RL4CoT/DPO/Meta_13B_numglueCorrect_extend_10lang_v3longer.json/checkpoint-2000", |
|
"epoch": 0.5, |
|
"eval_steps": 100, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1e-08, |
|
"logits/chosen": -1.932613730430603, |
|
"logits/rejected": -1.9839212894439697, |
|
"logps/chosen": -21.470577239990234, |
|
"logps/rejected": -29.716251373291016, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.00021913403179496527, |
|
"rewards/margins": -0.000711185741238296, |
|
"rewards/rejected": 0.0009303201222792268, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2e-08, |
|
"logits/chosen": -1.9172611236572266, |
|
"logits/rejected": -1.9188216924667358, |
|
"logps/chosen": -26.2304744720459, |
|
"logps/rejected": -34.4053955078125, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.001622863463126123, |
|
"rewards/margins": 0.0004410705587361008, |
|
"rewards/rejected": 0.0011817925842478871, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3e-08, |
|
"logits/chosen": -1.8847434520721436, |
|
"logits/rejected": -1.9170100688934326, |
|
"logps/chosen": -27.089553833007812, |
|
"logps/rejected": -27.409826278686523, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.001969739096239209, |
|
"rewards/margins": -0.001115257851779461, |
|
"rewards/rejected": -0.0008544811280444264, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4e-08, |
|
"logits/chosen": -1.9766782522201538, |
|
"logits/rejected": -2.03544282913208, |
|
"logps/chosen": -24.145034790039062, |
|
"logps/rejected": -29.940826416015625, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0021944832988083363, |
|
"rewards/margins": -0.004052413627505302, |
|
"rewards/rejected": 0.001857930445112288, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5e-08, |
|
"logits/chosen": -1.9266796112060547, |
|
"logits/rejected": -1.9567911624908447, |
|
"logps/chosen": -23.183719635009766, |
|
"logps/rejected": -30.98269271850586, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.000239828834310174, |
|
"rewards/margins": -0.0014836899936199188, |
|
"rewards/rejected": 0.0012438612757250667, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6e-08, |
|
"logits/chosen": -1.906097650527954, |
|
"logits/rejected": -1.9456329345703125, |
|
"logps/chosen": -24.585407257080078, |
|
"logps/rejected": -38.85862731933594, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0007270004716701806, |
|
"rewards/margins": -0.0001574301568325609, |
|
"rewards/rejected": 0.0008844308322295547, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.999999999999999e-08, |
|
"logits/chosen": -1.8719940185546875, |
|
"logits/rejected": -1.9031072854995728, |
|
"logps/chosen": -26.398326873779297, |
|
"logps/rejected": -26.643457412719727, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.00045877936645410955, |
|
"rewards/margins": 0.0018442294094711542, |
|
"rewards/rejected": -0.0023030086886137724, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8e-08, |
|
"logits/chosen": -1.8939685821533203, |
|
"logits/rejected": -1.9583580493927002, |
|
"logps/chosen": -25.56913185119629, |
|
"logps/rejected": -30.93756103515625, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.0020271444227546453, |
|
"rewards/margins": 0.0006519665475934744, |
|
"rewards/rejected": 0.0013751781079918146, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9e-08, |
|
"logits/chosen": -1.9265903234481812, |
|
"logits/rejected": -1.931652307510376, |
|
"logps/chosen": -22.316240310668945, |
|
"logps/rejected": -28.648876190185547, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.002130888169631362, |
|
"rewards/margins": -0.001213808893226087, |
|
"rewards/rejected": 0.0033446974121034145, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1e-07, |
|
"logits/chosen": -1.8686259984970093, |
|
"logits/rejected": -1.9298280477523804, |
|
"logps/chosen": -23.921396255493164, |
|
"logps/rejected": -31.777599334716797, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.0009275235352106392, |
|
"rewards/margins": -0.0007253309595398605, |
|
"rewards/rejected": 0.001652854261919856, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.1e-07, |
|
"logits/chosen": -1.9727710485458374, |
|
"logits/rejected": -1.9917280673980713, |
|
"logps/chosen": -22.190906524658203, |
|
"logps/rejected": -31.58829116821289, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.0005749320844188333, |
|
"rewards/margins": -0.0007338084396906197, |
|
"rewards/rejected": 0.000158876326167956, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.2e-07, |
|
"logits/chosen": -1.9801238775253296, |
|
"logits/rejected": -2.005495309829712, |
|
"logps/chosen": -27.862258911132812, |
|
"logps/rejected": -25.942520141601562, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.0010454825824126601, |
|
"rewards/margins": -0.0008343329536728561, |
|
"rewards/rejected": 0.0018798153614625335, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.3e-07, |
|
"logits/chosen": -1.9689757823944092, |
|
"logits/rejected": -1.9881080389022827, |
|
"logps/chosen": -21.018028259277344, |
|
"logps/rejected": -24.91114044189453, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.0009749111486598849, |
|
"rewards/margins": -0.0024081047158688307, |
|
"rewards/rejected": 0.0014331938000395894, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.3999999999999998e-07, |
|
"logits/chosen": -1.8362280130386353, |
|
"logits/rejected": -1.8990787267684937, |
|
"logps/chosen": -19.452341079711914, |
|
"logps/rejected": -31.218761444091797, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.001963708084076643, |
|
"rewards/margins": -0.0008558571571484208, |
|
"rewards/rejected": 0.002819565124809742, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.5e-07, |
|
"logits/chosen": -1.926926612854004, |
|
"logits/rejected": -1.923295259475708, |
|
"logps/chosen": -30.22551918029785, |
|
"logps/rejected": -29.960784912109375, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.002189194317907095, |
|
"rewards/margins": 0.00112381752114743, |
|
"rewards/rejected": 0.0010653762146830559, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.6e-07, |
|
"logits/chosen": -1.9044420719146729, |
|
"logits/rejected": -2.015061616897583, |
|
"logps/chosen": -21.34702491760254, |
|
"logps/rejected": -34.192962646484375, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0026284088380634785, |
|
"rewards/margins": 0.0029561687260866165, |
|
"rewards/rejected": -0.0003277602372691035, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.7e-07, |
|
"logits/chosen": -1.9384136199951172, |
|
"logits/rejected": -1.9547865390777588, |
|
"logps/chosen": -22.821643829345703, |
|
"logps/rejected": -27.573680877685547, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0042790137231349945, |
|
"rewards/margins": 0.003715975908562541, |
|
"rewards/rejected": 0.0005630379309877753, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.8e-07, |
|
"logits/chosen": -1.934216856956482, |
|
"logits/rejected": -1.9454669952392578, |
|
"logps/chosen": -25.801891326904297, |
|
"logps/rejected": -33.503868103027344, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.005253393668681383, |
|
"rewards/margins": 0.009739309549331665, |
|
"rewards/rejected": -0.004485915414988995, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.8999999999999998e-07, |
|
"logits/chosen": -1.9906108379364014, |
|
"logits/rejected": -1.982187271118164, |
|
"logps/chosen": -20.808486938476562, |
|
"logps/rejected": -22.990875244140625, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.007240566425025463, |
|
"rewards/margins": 0.009975696913897991, |
|
"rewards/rejected": -0.002735130488872528, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2e-07, |
|
"logits/chosen": -1.9031318426132202, |
|
"logits/rejected": -1.9641430377960205, |
|
"logps/chosen": -27.743423461914062, |
|
"logps/rejected": -33.22098159790039, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.007883915677666664, |
|
"rewards/margins": 0.010012111626565456, |
|
"rewards/rejected": -0.002128196880221367, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_logits/chosen": -2.094839572906494, |
|
"eval_logits/rejected": -2.1590890884399414, |
|
"eval_logps/chosen": -27.199792861938477, |
|
"eval_logps/rejected": -33.886505126953125, |
|
"eval_loss": 0.6891224384307861, |
|
"eval_rewards/accuracies": 0.6281948685646057, |
|
"eval_rewards/chosen": 0.007588829845190048, |
|
"eval_rewards/margins": 0.007552009075880051, |
|
"eval_rewards/rejected": 3.682050737552345e-05, |
|
"eval_runtime": 620.6125, |
|
"eval_samples_per_second": 32.226, |
|
"eval_steps_per_second": 0.504, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9999658256641745e-07, |
|
"logits/chosen": -1.8730093240737915, |
|
"logits/rejected": -1.9344890117645264, |
|
"logps/chosen": -20.13459014892578, |
|
"logps/rejected": -33.617027282714844, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.00706650223582983, |
|
"rewards/margins": 0.0060449643060564995, |
|
"rewards/rejected": 0.0010215395595878363, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.999863304992469e-07, |
|
"logits/chosen": -1.9270174503326416, |
|
"logits/rejected": -1.9574453830718994, |
|
"logps/chosen": -27.71604347229004, |
|
"logps/rejected": -25.006996154785156, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.006603191141039133, |
|
"rewards/margins": 0.008059519343078136, |
|
"rewards/rejected": -0.0014563293661922216, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9996924449920347e-07, |
|
"logits/chosen": -1.9630305767059326, |
|
"logits/rejected": -1.9570411443710327, |
|
"logps/chosen": -26.463726043701172, |
|
"logps/rejected": -32.061275482177734, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.014216710813343525, |
|
"rewards/margins": 0.009634643793106079, |
|
"rewards/rejected": 0.004582066088914871, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.999453257340926e-07, |
|
"logits/chosen": -1.8999840021133423, |
|
"logits/rejected": -1.9795329570770264, |
|
"logps/chosen": -25.98947525024414, |
|
"logps/rejected": -28.222991943359375, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.01323631964623928, |
|
"rewards/margins": 0.011925769969820976, |
|
"rewards/rejected": 0.00131055002566427, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9991457583873009e-07, |
|
"logits/chosen": -1.8996551036834717, |
|
"logits/rejected": -1.955409049987793, |
|
"logps/chosen": -22.584300994873047, |
|
"logps/rejected": -33.386985778808594, |
|
"loss": 0.6865, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.013754159212112427, |
|
"rewards/margins": 0.023814406245946884, |
|
"rewards/rejected": -0.010060247965157032, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9987699691483047e-07, |
|
"logits/chosen": -2.0506348609924316, |
|
"logits/rejected": -2.0362610816955566, |
|
"logps/chosen": -23.7604923248291, |
|
"logps/rejected": -26.14568519592285, |
|
"loss": 0.6857, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.00911510456353426, |
|
"rewards/margins": 0.018404189497232437, |
|
"rewards/rejected": -0.009289086796343327, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9983259153086325e-07, |
|
"logits/chosen": -1.9324474334716797, |
|
"logits/rejected": -1.9770278930664062, |
|
"logps/chosen": -22.019317626953125, |
|
"logps/rejected": -32.28319549560547, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.01499349158257246, |
|
"rewards/margins": 0.011077550239861012, |
|
"rewards/rejected": 0.003915940877050161, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9978136272187745e-07, |
|
"logits/chosen": -1.8848316669464111, |
|
"logits/rejected": -1.9437576532363892, |
|
"logps/chosen": -23.589109420776367, |
|
"logps/rejected": -35.33000946044922, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.023860031738877296, |
|
"rewards/margins": 0.032189734280109406, |
|
"rewards/rejected": -0.008329702541232109, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.997233139892941e-07, |
|
"logits/chosen": -1.9527661800384521, |
|
"logits/rejected": -1.9712032079696655, |
|
"logps/chosen": -26.96938705444336, |
|
"logps/rejected": -37.38798522949219, |
|
"loss": 0.6792, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.029867831617593765, |
|
"rewards/margins": 0.0479811429977417, |
|
"rewards/rejected": -0.018113311380147934, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9965844930066698e-07, |
|
"logits/chosen": -1.9537423849105835, |
|
"logits/rejected": -2.0033607482910156, |
|
"logps/chosen": -24.348556518554688, |
|
"logps/rejected": -33.808815002441406, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.020862484350800514, |
|
"rewards/margins": 0.031281448900699615, |
|
"rewards/rejected": -0.010418963618576527, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9958677308941136e-07, |
|
"logits/chosen": -1.8261839151382446, |
|
"logits/rejected": -1.9302173852920532, |
|
"logps/chosen": -29.987186431884766, |
|
"logps/rejected": -35.99931335449219, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.019242849200963974, |
|
"rewards/margins": 0.02805192768573761, |
|
"rewards/rejected": -0.008809077553451061, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9950829025450114e-07, |
|
"logits/chosen": -1.9289829730987549, |
|
"logits/rejected": -1.947072982788086, |
|
"logps/chosen": -28.187902450561523, |
|
"logps/rejected": -34.05443572998047, |
|
"loss": 0.6765, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.021738989278674126, |
|
"rewards/margins": 0.030259471386671066, |
|
"rewards/rejected": -0.008520480245351791, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9942300616013377e-07, |
|
"logits/chosen": -1.9248237609863281, |
|
"logits/rejected": -1.9657666683197021, |
|
"logps/chosen": -24.501365661621094, |
|
"logps/rejected": -35.636444091796875, |
|
"loss": 0.6758, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.027700508013367653, |
|
"rewards/margins": 0.04301094263792038, |
|
"rewards/rejected": -0.015310434624552727, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.993309266353638e-07, |
|
"logits/chosen": -1.9632354974746704, |
|
"logits/rejected": -2.003535747528076, |
|
"logps/chosen": -23.00834846496582, |
|
"logps/rejected": -35.10881805419922, |
|
"loss": 0.672, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.022433171048760414, |
|
"rewards/margins": 0.03842338174581528, |
|
"rewards/rejected": -0.01599021628499031, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.992320579737045e-07, |
|
"logits/chosen": -1.924059510231018, |
|
"logits/rejected": -1.9485867023468018, |
|
"logps/chosen": -23.606395721435547, |
|
"logps/rejected": -34.96836471557617, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.020746735855937004, |
|
"rewards/margins": 0.03996802866458893, |
|
"rewards/rejected": -0.019221294671297073, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9912640693269751e-07, |
|
"logits/chosen": -1.8697850704193115, |
|
"logits/rejected": -1.8567920923233032, |
|
"logps/chosen": -21.75259017944336, |
|
"logps/rejected": -30.40744400024414, |
|
"loss": 0.6734, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.01639285869896412, |
|
"rewards/margins": 0.043617911636829376, |
|
"rewards/rejected": -0.027225052937865257, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9901398073345117e-07, |
|
"logits/chosen": -1.9471813440322876, |
|
"logits/rejected": -1.9353182315826416, |
|
"logps/chosen": -25.765697479248047, |
|
"logps/rejected": -29.618860244750977, |
|
"loss": 0.668, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.02380688488483429, |
|
"rewards/margins": 0.049822211265563965, |
|
"rewards/rejected": -0.026015322655439377, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9889478706014683e-07, |
|
"logits/chosen": -1.9487874507904053, |
|
"logits/rejected": -2.0098330974578857, |
|
"logps/chosen": -21.570528030395508, |
|
"logps/rejected": -33.25171661376953, |
|
"loss": 0.6662, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.026225054636597633, |
|
"rewards/margins": 0.06671885401010513, |
|
"rewards/rejected": -0.04049379751086235, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9876883405951376e-07, |
|
"logits/chosen": -1.8715938329696655, |
|
"logits/rejected": -1.9319870471954346, |
|
"logps/chosen": -26.760677337646484, |
|
"logps/rejected": -33.19749069213867, |
|
"loss": 0.668, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.023825686424970627, |
|
"rewards/margins": 0.051926322281360626, |
|
"rewards/rejected": -0.02810063399374485, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9863613034027222e-07, |
|
"logits/chosen": -1.8788840770721436, |
|
"logits/rejected": -1.8988853693008423, |
|
"logps/chosen": -25.21091079711914, |
|
"logps/rejected": -29.254589080810547, |
|
"loss": 0.6593, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.018495548516511917, |
|
"rewards/margins": 0.06201624125242233, |
|
"rewards/rejected": -0.04352068901062012, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_logits/chosen": -2.055267333984375, |
|
"eval_logits/rejected": -2.1217849254608154, |
|
"eval_logps/chosen": -27.035058975219727, |
|
"eval_logps/rejected": -34.23577880859375, |
|
"eval_loss": 0.6638627648353577, |
|
"eval_rewards/accuracies": 0.7012779712677002, |
|
"eval_rewards/chosen": 0.024062257260084152, |
|
"eval_rewards/margins": 0.05895264819264412, |
|
"eval_rewards/rejected": -0.03489040210843086, |
|
"eval_runtime": 624.7249, |
|
"eval_samples_per_second": 32.014, |
|
"eval_steps_per_second": 0.501, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9849668497254518e-07, |
|
"logits/chosen": -1.934955358505249, |
|
"logits/rejected": -1.962149977684021, |
|
"logps/chosen": -27.25844955444336, |
|
"logps/rejected": -34.96973419189453, |
|
"loss": 0.6654, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0293357465416193, |
|
"rewards/margins": 0.07454489171504974, |
|
"rewards/rejected": -0.045209143310785294, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9835050748723822e-07, |
|
"logits/chosen": -1.9069238901138306, |
|
"logits/rejected": -1.9731998443603516, |
|
"logps/chosen": -24.134227752685547, |
|
"logps/rejected": -35.1556282043457, |
|
"loss": 0.6578, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.034762006253004074, |
|
"rewards/margins": 0.08553215861320496, |
|
"rewards/rejected": -0.05077015236020088, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9819760787538837e-07, |
|
"logits/chosen": -1.871299147605896, |
|
"logits/rejected": -1.9281699657440186, |
|
"logps/chosen": -23.06464195251465, |
|
"logps/rejected": -31.450504302978516, |
|
"loss": 0.6563, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.022200025618076324, |
|
"rewards/margins": 0.08272843807935715, |
|
"rewards/rejected": -0.06052841991186142, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9803799658748093e-07, |
|
"logits/chosen": -1.8807361125946045, |
|
"logits/rejected": -1.9069770574569702, |
|
"logps/chosen": -30.42557716369629, |
|
"logps/rejected": -34.229042053222656, |
|
"loss": 0.6562, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.022107623517513275, |
|
"rewards/margins": 0.08593301475048065, |
|
"rewards/rejected": -0.06382538378238678, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9787168453273545e-07, |
|
"logits/chosen": -1.8091869354248047, |
|
"logits/rejected": -1.8331130743026733, |
|
"logps/chosen": -25.224321365356445, |
|
"logps/rejected": -30.4110107421875, |
|
"loss": 0.6554, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0017654001712799072, |
|
"rewards/margins": 0.052088379859924316, |
|
"rewards/rejected": -0.053853780031204224, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9769868307835993e-07, |
|
"logits/chosen": -1.9052034616470337, |
|
"logits/rejected": -1.948081612586975, |
|
"logps/chosen": -25.112337112426758, |
|
"logps/rejected": -33.975223541259766, |
|
"loss": 0.6495, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.021494796499609947, |
|
"rewards/margins": 0.09832977503538132, |
|
"rewards/rejected": -0.07683496922254562, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9751900404877398e-07, |
|
"logits/chosen": -1.9521013498306274, |
|
"logits/rejected": -2.0165581703186035, |
|
"logps/chosen": -24.376867294311523, |
|
"logps/rejected": -31.251483917236328, |
|
"loss": 0.6525, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.03034518100321293, |
|
"rewards/margins": 0.07310134917497635, |
|
"rewards/rejected": -0.04275617375969887, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9733265972480058e-07, |
|
"logits/chosen": -1.8078219890594482, |
|
"logits/rejected": -1.84963858127594, |
|
"logps/chosen": -26.859643936157227, |
|
"logps/rejected": -39.10490036010742, |
|
"loss": 0.6486, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.02189042791724205, |
|
"rewards/margins": 0.10619709640741348, |
|
"rewards/rejected": -0.08430664986371994, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9713966284282674e-07, |
|
"logits/chosen": -1.8735325336456299, |
|
"logits/rejected": -1.8725016117095947, |
|
"logps/chosen": -26.303363800048828, |
|
"logps/rejected": -34.02263641357422, |
|
"loss": 0.6508, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.002572746481746435, |
|
"rewards/margins": 0.08564522862434387, |
|
"rewards/rejected": -0.08821798115968704, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9694002659393302e-07, |
|
"logits/chosen": -1.8189780712127686, |
|
"logits/rejected": -1.8682382106781006, |
|
"logps/chosen": -23.835880279541016, |
|
"logps/rejected": -33.8657112121582, |
|
"loss": 0.6466, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.0005129946512170136, |
|
"rewards/margins": 0.1281592845916748, |
|
"rewards/rejected": -0.1276462972164154, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9673376462299182e-07, |
|
"logits/chosen": -1.8371633291244507, |
|
"logits/rejected": -1.9105684757232666, |
|
"logps/chosen": -21.575557708740234, |
|
"logps/rejected": -34.56597137451172, |
|
"loss": 0.6423, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.007006521336734295, |
|
"rewards/margins": 0.09751694649457932, |
|
"rewards/rejected": -0.09051042795181274, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9652089102773487e-07, |
|
"logits/chosen": -1.8369518518447876, |
|
"logits/rejected": -1.8599889278411865, |
|
"logps/chosen": -28.181167602539062, |
|
"logps/rejected": -31.22989273071289, |
|
"loss": 0.6411, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.025061314925551414, |
|
"rewards/margins": 0.1553947627544403, |
|
"rewards/rejected": -0.13033345341682434, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.963014203577896e-07, |
|
"logits/chosen": -1.8845443725585938, |
|
"logits/rejected": -1.8823344707489014, |
|
"logps/chosen": -29.47086524963379, |
|
"logps/rejected": -35.28539276123047, |
|
"loss": 0.6358, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.017991356551647186, |
|
"rewards/margins": 0.1316610425710678, |
|
"rewards/rejected": -0.11366970837116241, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9607536761368482e-07, |
|
"logits/chosen": -1.8176910877227783, |
|
"logits/rejected": -1.8355636596679688, |
|
"logps/chosen": -25.24968910217285, |
|
"logps/rejected": -33.39419174194336, |
|
"loss": 0.6364, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.005754109006375074, |
|
"rewards/margins": 0.15424194931983948, |
|
"rewards/rejected": -0.148487851023674, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9584274824582527e-07, |
|
"logits/chosen": -1.850232720375061, |
|
"logits/rejected": -1.9631538391113281, |
|
"logps/chosen": -24.343835830688477, |
|
"logps/rejected": -33.17558670043945, |
|
"loss": 0.6367, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.0063208164647221565, |
|
"rewards/margins": 0.12795674800872803, |
|
"rewards/rejected": -0.1342775821685791, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9560357815343574e-07, |
|
"logits/chosen": -1.855934739112854, |
|
"logits/rejected": -1.874148964881897, |
|
"logps/chosen": -22.502605438232422, |
|
"logps/rejected": -28.427841186523438, |
|
"loss": 0.6266, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.024706363677978516, |
|
"rewards/margins": 0.14013074338436127, |
|
"rewards/rejected": -0.1648370921611786, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9535787368347442e-07, |
|
"logits/chosen": -1.8721545934677124, |
|
"logits/rejected": -1.900692343711853, |
|
"logps/chosen": -25.19928550720215, |
|
"logps/rejected": -34.262535095214844, |
|
"loss": 0.6278, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.032768525183200836, |
|
"rewards/margins": 0.1182633638381958, |
|
"rewards/rejected": -0.15103188157081604, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9510565162951537e-07, |
|
"logits/chosen": -1.8483591079711914, |
|
"logits/rejected": -1.8752696514129639, |
|
"logps/chosen": -20.428386688232422, |
|
"logps/rejected": -36.55931854248047, |
|
"loss": 0.6365, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.036745864897966385, |
|
"rewards/margins": 0.11163260042667389, |
|
"rewards/rejected": -0.14837846159934998, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9484692923060094e-07, |
|
"logits/chosen": -1.8256515264511108, |
|
"logits/rejected": -1.9205400943756104, |
|
"logps/chosen": -23.362619400024414, |
|
"logps/rejected": -31.759166717529297, |
|
"loss": 0.6362, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.0479150153696537, |
|
"rewards/margins": 0.17140944302082062, |
|
"rewards/rejected": -0.21932446956634521, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9458172417006346e-07, |
|
"logits/chosen": -1.7794628143310547, |
|
"logits/rejected": -1.7937818765640259, |
|
"logps/chosen": -25.80463218688965, |
|
"logps/rejected": -35.583526611328125, |
|
"loss": 0.6272, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.07188913971185684, |
|
"rewards/margins": 0.11425880342721939, |
|
"rewards/rejected": -0.18614795804023743, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_logits/chosen": -1.982098937034607, |
|
"eval_logits/rejected": -2.0512313842773438, |
|
"eval_logps/chosen": -27.691436767578125, |
|
"eval_logps/rejected": -35.85608673095703, |
|
"eval_loss": 0.6240853667259216, |
|
"eval_rewards/accuracies": 0.710463285446167, |
|
"eval_rewards/chosen": -0.041575513780117035, |
|
"eval_rewards/margins": 0.15534532070159912, |
|
"eval_rewards/rejected": -0.19692084193229675, |
|
"eval_runtime": 628.2953, |
|
"eval_samples_per_second": 31.832, |
|
"eval_steps_per_second": 0.498, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.943100545743165e-07, |
|
"logits/chosen": -1.760960340499878, |
|
"logits/rejected": -1.850353479385376, |
|
"logps/chosen": -21.75943374633789, |
|
"logps/rejected": -36.918704986572266, |
|
"loss": 0.6199, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.015559827908873558, |
|
"rewards/margins": 0.18496355414390564, |
|
"rewards/rejected": -0.20052340626716614, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9403193901161612e-07, |
|
"logits/chosen": -1.8915361166000366, |
|
"logits/rejected": -1.9225527048110962, |
|
"logps/chosen": -25.573026657104492, |
|
"logps/rejected": -33.04106903076172, |
|
"loss": 0.6159, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.014883624389767647, |
|
"rewards/margins": 0.21884708106517792, |
|
"rewards/rejected": -0.2337307184934616, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9374739649079154e-07, |
|
"logits/chosen": -1.7862402200698853, |
|
"logits/rejected": -1.8242515325546265, |
|
"logps/chosen": -22.60375213623047, |
|
"logps/rejected": -28.420673370361328, |
|
"loss": 0.6187, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.027039390057325363, |
|
"rewards/margins": 0.17555785179138184, |
|
"rewards/rejected": -0.2025972604751587, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9345644645994608e-07, |
|
"logits/chosen": -1.8331549167633057, |
|
"logits/rejected": -1.8311151266098022, |
|
"logps/chosen": -25.251590728759766, |
|
"logps/rejected": -35.493736267089844, |
|
"loss": 0.6248, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.04046057537198067, |
|
"rewards/margins": 0.22045457363128662, |
|
"rewards/rejected": -0.2609151601791382, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9315910880512788e-07, |
|
"logits/chosen": -1.797435998916626, |
|
"logits/rejected": -1.8667093515396118, |
|
"logps/chosen": -25.154064178466797, |
|
"logps/rejected": -32.20864486694336, |
|
"loss": 0.6174, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.055550407618284225, |
|
"rewards/margins": 0.21562273800373077, |
|
"rewards/rejected": -0.2711731493473053, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.928554038489707e-07, |
|
"logits/chosen": -1.6940425634384155, |
|
"logits/rejected": -1.744011640548706, |
|
"logps/chosen": -28.64211654663086, |
|
"logps/rejected": -36.03167724609375, |
|
"loss": 0.6163, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.10549549013376236, |
|
"rewards/margins": 0.20552174746990204, |
|
"rewards/rejected": -0.3110172152519226, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9254535234930483e-07, |
|
"logits/chosen": -1.8530375957489014, |
|
"logits/rejected": -1.8774633407592773, |
|
"logps/chosen": -24.82217788696289, |
|
"logps/rejected": -34.68824005126953, |
|
"loss": 0.6181, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.10517600923776627, |
|
"rewards/margins": 0.17466488480567932, |
|
"rewards/rejected": -0.279840886592865, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9222897549773846e-07, |
|
"logits/chosen": -1.7554054260253906, |
|
"logits/rejected": -1.7775160074234009, |
|
"logps/chosen": -33.513790130615234, |
|
"logps/rejected": -38.4843864440918, |
|
"loss": 0.6049, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.11450239270925522, |
|
"rewards/margins": 0.21290548145771027, |
|
"rewards/rejected": -0.3274078667163849, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9190629491820908e-07, |
|
"logits/chosen": -1.8356316089630127, |
|
"logits/rejected": -1.8637549877166748, |
|
"logps/chosen": -23.877962112426758, |
|
"logps/rejected": -37.12334060668945, |
|
"loss": 0.6083, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1105366125702858, |
|
"rewards/margins": 0.2586100697517395, |
|
"rewards/rejected": -0.3691467046737671, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9157733266550572e-07, |
|
"logits/chosen": -1.8048322200775146, |
|
"logits/rejected": -1.8302946090698242, |
|
"logps/chosen": -23.856672286987305, |
|
"logps/rejected": -29.770706176757812, |
|
"loss": 0.5973, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.10654797405004501, |
|
"rewards/margins": 0.24383535981178284, |
|
"rewards/rejected": -0.35038334131240845, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9124211122376135e-07, |
|
"logits/chosen": -1.732420563697815, |
|
"logits/rejected": -1.8045669794082642, |
|
"logps/chosen": -27.87255859375, |
|
"logps/rejected": -47.80708694458008, |
|
"loss": 0.5929, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.06705266982316971, |
|
"rewards/margins": 0.3158617913722992, |
|
"rewards/rejected": -0.3829144537448883, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9090065350491624e-07, |
|
"logits/chosen": -1.6546525955200195, |
|
"logits/rejected": -1.715736746788025, |
|
"logps/chosen": -23.659460067749023, |
|
"logps/rejected": -41.387020111083984, |
|
"loss": 0.5963, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.08905567228794098, |
|
"rewards/margins": 0.23344358801841736, |
|
"rewards/rejected": -0.32249927520751953, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.905529828471519e-07, |
|
"logits/chosen": -1.7011845111846924, |
|
"logits/rejected": -1.7666597366333008, |
|
"logps/chosen": -22.377338409423828, |
|
"logps/rejected": -40.995094299316406, |
|
"loss": 0.5949, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.052688825875520706, |
|
"rewards/margins": 0.33948010206222534, |
|
"rewards/rejected": -0.39216893911361694, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.901991230132959e-07, |
|
"logits/chosen": -1.810546875, |
|
"logits/rejected": -1.8701536655426025, |
|
"logps/chosen": -25.03759765625, |
|
"logps/rejected": -37.37371826171875, |
|
"loss": 0.6012, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.04496913030743599, |
|
"rewards/margins": 0.305519163608551, |
|
"rewards/rejected": -0.3504883348941803, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8983909818919788e-07, |
|
"logits/chosen": -1.8299157619476318, |
|
"logits/rejected": -1.8482189178466797, |
|
"logps/chosen": -25.418384552001953, |
|
"logps/rejected": -38.091896057128906, |
|
"loss": 0.5861, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.1421210616827011, |
|
"rewards/margins": 0.312614381313324, |
|
"rewards/rejected": -0.4547354280948639, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8947293298207635e-07, |
|
"logits/chosen": -1.6938432455062866, |
|
"logits/rejected": -1.7125260829925537, |
|
"logps/chosen": -23.936098098754883, |
|
"logps/rejected": -40.08384323120117, |
|
"loss": 0.5878, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.17097172141075134, |
|
"rewards/margins": 0.3229272663593292, |
|
"rewards/rejected": -0.49389901757240295, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8910065241883678e-07, |
|
"logits/chosen": -1.744314193725586, |
|
"logits/rejected": -1.8095916509628296, |
|
"logps/chosen": -29.93277359008789, |
|
"logps/rejected": -34.6977424621582, |
|
"loss": 0.579, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.20626993477344513, |
|
"rewards/margins": 0.2772943675518036, |
|
"rewards/rejected": -0.48356422781944275, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8872228194436116e-07, |
|
"logits/chosen": -1.7233030796051025, |
|
"logits/rejected": -1.7732064723968506, |
|
"logps/chosen": -27.3117733001709, |
|
"logps/rejected": -28.547231674194336, |
|
"loss": 0.568, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.10007091611623764, |
|
"rewards/margins": 0.3294708728790283, |
|
"rewards/rejected": -0.42954176664352417, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8833784741976886e-07, |
|
"logits/chosen": -1.771653175354004, |
|
"logits/rejected": -1.793135404586792, |
|
"logps/chosen": -25.894977569580078, |
|
"logps/rejected": -33.55714797973633, |
|
"loss": 0.5755, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.25736480951309204, |
|
"rewards/margins": 0.29228243231773376, |
|
"rewards/rejected": -0.5496472120285034, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8794737512064888e-07, |
|
"logits/chosen": -1.744651436805725, |
|
"logits/rejected": -1.8249235153198242, |
|
"logps/chosen": -31.24419593811035, |
|
"logps/rejected": -39.73127365112305, |
|
"loss": 0.5806, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.23053815960884094, |
|
"rewards/margins": 0.34528475999832153, |
|
"rewards/rejected": -0.5758228898048401, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -1.8933817148208618, |
|
"eval_logits/rejected": -1.9633389711380005, |
|
"eval_logps/chosen": -29.42821502685547, |
|
"eval_logps/rejected": -39.12174987792969, |
|
"eval_loss": 0.5772050023078918, |
|
"eval_rewards/accuracies": 0.7336261868476868, |
|
"eval_rewards/chosen": -0.2152535617351532, |
|
"eval_rewards/margins": 0.3082338869571686, |
|
"eval_rewards/rejected": -0.5234874486923218, |
|
"eval_runtime": 638.6461, |
|
"eval_samples_per_second": 31.316, |
|
"eval_steps_per_second": 0.49, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.875508917352643e-07, |
|
"logits/chosen": -1.7457950115203857, |
|
"logits/rejected": -1.7916702032089233, |
|
"logps/chosen": -28.520294189453125, |
|
"logps/rejected": -34.520931243896484, |
|
"loss": 0.5825, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.28079259395599365, |
|
"rewards/margins": 0.21762211620807648, |
|
"rewards/rejected": -0.49841469526290894, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.871484243627277e-07, |
|
"logits/chosen": -1.7049251794815063, |
|
"logits/rejected": -1.7206684350967407, |
|
"logps/chosen": -30.74674415588379, |
|
"logps/rejected": -39.634422302246094, |
|
"loss": 0.5677, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2962929606437683, |
|
"rewards/margins": 0.273542582988739, |
|
"rewards/rejected": -0.5698355436325073, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.867400005111495e-07, |
|
"logits/chosen": -1.771710991859436, |
|
"logits/rejected": -1.7792901992797852, |
|
"logps/chosen": -24.596803665161133, |
|
"logps/rejected": -34.267784118652344, |
|
"loss": 0.5644, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.27595609426498413, |
|
"rewards/margins": 0.3286646008491516, |
|
"rewards/rejected": -0.6046206951141357, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8632564809575738e-07, |
|
"logits/chosen": -1.6727784872055054, |
|
"logits/rejected": -1.763475775718689, |
|
"logps/chosen": -25.139801025390625, |
|
"logps/rejected": -46.42198944091797, |
|
"loss": 0.5476, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2259303778409958, |
|
"rewards/margins": 0.4986729025840759, |
|
"rewards/rejected": -0.7246032953262329, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.859053954369885e-07, |
|
"logits/chosen": -1.7414724826812744, |
|
"logits/rejected": -1.7918275594711304, |
|
"logps/chosen": -24.54043960571289, |
|
"logps/rejected": -50.85098648071289, |
|
"loss": 0.5723, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.23223881423473358, |
|
"rewards/margins": 0.47227388620376587, |
|
"rewards/rejected": -0.7045127153396606, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.854792712585539e-07, |
|
"logits/chosen": -1.645516037940979, |
|
"logits/rejected": -1.693722128868103, |
|
"logps/chosen": -25.945995330810547, |
|
"logps/rejected": -45.90827560424805, |
|
"loss": 0.5483, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.3240446448326111, |
|
"rewards/margins": 0.5380430221557617, |
|
"rewards/rejected": -0.8620877265930176, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8504730468547506e-07, |
|
"logits/chosen": -1.649815320968628, |
|
"logits/rejected": -1.6857585906982422, |
|
"logps/chosen": -23.51533317565918, |
|
"logps/rejected": -34.9762077331543, |
|
"loss": 0.561, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.3314105272293091, |
|
"rewards/margins": 0.3385566771030426, |
|
"rewards/rejected": -0.6699672341346741, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.846095252420935e-07, |
|
"logits/chosen": -1.6907812356948853, |
|
"logits/rejected": -1.6901006698608398, |
|
"logps/chosen": -26.697391510009766, |
|
"logps/rejected": -35.67798614501953, |
|
"loss": 0.5664, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.23831510543823242, |
|
"rewards/margins": 0.37860625982284546, |
|
"rewards/rejected": -0.6169213056564331, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.841659628500527e-07, |
|
"logits/chosen": -1.7292283773422241, |
|
"logits/rejected": -1.761568307876587, |
|
"logps/chosen": -31.302043914794922, |
|
"logps/rejected": -42.45463943481445, |
|
"loss": 0.5407, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.361013263463974, |
|
"rewards/margins": 0.4722154140472412, |
|
"rewards/rejected": -0.833228588104248, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8371664782625284e-07, |
|
"logits/chosen": -1.641015648841858, |
|
"logits/rejected": -1.6467399597167969, |
|
"logps/chosen": -28.206012725830078, |
|
"logps/rejected": -35.136924743652344, |
|
"loss": 0.5402, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2760232388973236, |
|
"rewards/margins": 0.4164581298828125, |
|
"rewards/rejected": -0.6924813985824585, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8326161088077904e-07, |
|
"logits/chosen": -1.6614574193954468, |
|
"logits/rejected": -1.7363828420639038, |
|
"logps/chosen": -22.726844787597656, |
|
"logps/rejected": -38.93574142456055, |
|
"loss": 0.543, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.32301831245422363, |
|
"rewards/margins": 0.4177462160587311, |
|
"rewards/rejected": -0.7407644987106323, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.82800883114802e-07, |
|
"logits/chosen": -1.6789630651474, |
|
"logits/rejected": -1.7167510986328125, |
|
"logps/chosen": -27.532705307006836, |
|
"logps/rejected": -46.597877502441406, |
|
"loss": 0.5481, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.32634788751602173, |
|
"rewards/margins": 0.510971188545227, |
|
"rewards/rejected": -0.8373190760612488, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.8233449601845256e-07, |
|
"logits/chosen": -1.6799752712249756, |
|
"logits/rejected": -1.746375322341919, |
|
"logps/chosen": -31.223033905029297, |
|
"logps/rejected": -39.73358917236328, |
|
"loss": 0.5161, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.41861018538475037, |
|
"rewards/margins": 0.43975359201431274, |
|
"rewards/rejected": -0.8583638072013855, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.8186248146866925e-07, |
|
"logits/chosen": -1.7066395282745361, |
|
"logits/rejected": -1.7620627880096436, |
|
"logps/chosen": -27.414352416992188, |
|
"logps/rejected": -46.25200653076172, |
|
"loss": 0.5246, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.38034266233444214, |
|
"rewards/margins": 0.5347098112106323, |
|
"rewards/rejected": -0.9150525331497192, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.8138487172701948e-07, |
|
"logits/chosen": -1.6645714044570923, |
|
"logits/rejected": -1.6783406734466553, |
|
"logps/chosen": -25.467668533325195, |
|
"logps/rejected": -42.95486831665039, |
|
"loss": 0.535, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3398158550262451, |
|
"rewards/margins": 0.697130560874939, |
|
"rewards/rejected": -1.0369462966918945, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.8090169943749475e-07, |
|
"logits/chosen": -1.717818021774292, |
|
"logits/rejected": -1.761845350265503, |
|
"logps/chosen": -26.850399017333984, |
|
"logps/rejected": -42.4122200012207, |
|
"loss": 0.5404, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5769149661064148, |
|
"rewards/margins": 0.5164484977722168, |
|
"rewards/rejected": -1.0933634042739868, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.8041299762427914e-07, |
|
"logits/chosen": -1.5914769172668457, |
|
"logits/rejected": -1.6343708038330078, |
|
"logps/chosen": -28.54681396484375, |
|
"logps/rejected": -37.71539306640625, |
|
"loss": 0.5383, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.3866100609302521, |
|
"rewards/margins": 0.4422999918460846, |
|
"rewards/rejected": -0.8289100527763367, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7991879968949247e-07, |
|
"logits/chosen": -1.6161174774169922, |
|
"logits/rejected": -1.6184766292572021, |
|
"logps/chosen": -34.267799377441406, |
|
"logps/rejected": -42.01969528198242, |
|
"loss": 0.5468, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5237308740615845, |
|
"rewards/margins": 0.4367442727088928, |
|
"rewards/rejected": -0.960474967956543, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.794191394109071e-07, |
|
"logits/chosen": -1.6576045751571655, |
|
"logits/rejected": -1.727998971939087, |
|
"logps/chosen": -30.341400146484375, |
|
"logps/rejected": -38.22698211669922, |
|
"loss": 0.5517, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.4688238203525543, |
|
"rewards/margins": 0.46207141876220703, |
|
"rewards/rejected": -0.9308953285217285, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7891405093963936e-07, |
|
"logits/chosen": -1.6647958755493164, |
|
"logits/rejected": -1.6835273504257202, |
|
"logps/chosen": -33.16368865966797, |
|
"logps/rejected": -39.30922317504883, |
|
"loss": 0.5317, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5163825750350952, |
|
"rewards/margins": 0.40946492552757263, |
|
"rewards/rejected": -0.9258475303649902, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_logits/chosen": -1.815245270729065, |
|
"eval_logits/rejected": -1.8842970132827759, |
|
"eval_logps/chosen": -32.26655578613281, |
|
"eval_logps/rejected": -44.04635238647461, |
|
"eval_loss": 0.5326652526855469, |
|
"eval_rewards/accuracies": 0.7432108521461487, |
|
"eval_rewards/chosen": -0.49908730387687683, |
|
"eval_rewards/margins": 0.5168607831001282, |
|
"eval_rewards/rejected": -1.0159480571746826, |
|
"eval_runtime": 642.3458, |
|
"eval_samples_per_second": 31.136, |
|
"eval_steps_per_second": 0.487, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7840356879781529e-07, |
|
"logits/chosen": -1.679195761680603, |
|
"logits/rejected": -1.7675142288208008, |
|
"logps/chosen": -35.0252799987793, |
|
"logps/rejected": -42.8774299621582, |
|
"loss": 0.5327, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5109128355979919, |
|
"rewards/margins": 0.5178115963935852, |
|
"rewards/rejected": -1.0287244319915771, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7788772787621125e-07, |
|
"logits/chosen": -1.5657098293304443, |
|
"logits/rejected": -1.6311728954315186, |
|
"logps/chosen": -25.362163543701172, |
|
"logps/rejected": -38.46403884887695, |
|
"loss": 0.5062, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.43067851662635803, |
|
"rewards/margins": 0.4838063716888428, |
|
"rewards/rejected": -0.9144848585128784, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7736656343186894e-07, |
|
"logits/chosen": -1.680122971534729, |
|
"logits/rejected": -1.6815965175628662, |
|
"logps/chosen": -32.139137268066406, |
|
"logps/rejected": -46.62664794921875, |
|
"loss": 0.5077, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5889835357666016, |
|
"rewards/margins": 0.4796602725982666, |
|
"rewards/rejected": -1.0686438083648682, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.768401110856859e-07, |
|
"logits/chosen": -1.6242386102676392, |
|
"logits/rejected": -1.6613088846206665, |
|
"logps/chosen": -27.435977935791016, |
|
"logps/rejected": -44.428260803222656, |
|
"loss": 0.5094, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.4436129033565521, |
|
"rewards/margins": 0.7085938453674316, |
|
"rewards/rejected": -1.1522066593170166, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7630840681998066e-07, |
|
"logits/chosen": -1.6392037868499756, |
|
"logits/rejected": -1.6771599054336548, |
|
"logps/chosen": -30.593151092529297, |
|
"logps/rejected": -38.12665557861328, |
|
"loss": 0.5225, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5331975221633911, |
|
"rewards/margins": 0.5177993178367615, |
|
"rewards/rejected": -1.0509967803955078, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7577148697603348e-07, |
|
"logits/chosen": -1.653996229171753, |
|
"logits/rejected": -1.6901382207870483, |
|
"logps/chosen": -28.415035247802734, |
|
"logps/rejected": -43.633609771728516, |
|
"loss": 0.5314, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5270938873291016, |
|
"rewards/margins": 0.6642566919326782, |
|
"rewards/rejected": -1.1913506984710693, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7522938825160247e-07, |
|
"logits/chosen": -1.5846532583236694, |
|
"logits/rejected": -1.6490291357040405, |
|
"logps/chosen": -29.801273345947266, |
|
"logps/rejected": -42.26642990112305, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.44840726256370544, |
|
"rewards/margins": 0.6187388300895691, |
|
"rewards/rejected": -1.0671460628509521, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7468214769841538e-07, |
|
"logits/chosen": -1.68500554561615, |
|
"logits/rejected": -1.7133398056030273, |
|
"logps/chosen": -32.34290313720703, |
|
"logps/rejected": -42.54985046386719, |
|
"loss": 0.5033, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.6745347380638123, |
|
"rewards/margins": 0.5442879796028137, |
|
"rewards/rejected": -1.218822717666626, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7412980271963708e-07, |
|
"logits/chosen": -1.6446959972381592, |
|
"logits/rejected": -1.6709632873535156, |
|
"logps/chosen": -32.75804901123047, |
|
"logps/rejected": -41.7109260559082, |
|
"loss": 0.503, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6608411073684692, |
|
"rewards/margins": 0.5818773508071899, |
|
"rewards/rejected": -1.2427183389663696, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7357239106731316e-07, |
|
"logits/chosen": -1.5884132385253906, |
|
"logits/rejected": -1.6537491083145142, |
|
"logps/chosen": -33.371700286865234, |
|
"logps/rejected": -49.154361724853516, |
|
"loss": 0.5306, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7718857526779175, |
|
"rewards/margins": 0.5803537368774414, |
|
"rewards/rejected": -1.3522393703460693, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7300995083978961e-07, |
|
"logits/chosen": -1.5982125997543335, |
|
"logits/rejected": -1.6282612085342407, |
|
"logps/chosen": -30.794885635375977, |
|
"logps/rejected": -46.758148193359375, |
|
"loss": 0.4896, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7337169051170349, |
|
"rewards/margins": 0.6327022314071655, |
|
"rewards/rejected": -1.3664191961288452, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.724425204791089e-07, |
|
"logits/chosen": -1.573806881904602, |
|
"logits/rejected": -1.6021778583526611, |
|
"logps/chosen": -32.741111755371094, |
|
"logps/rejected": -41.9412727355957, |
|
"loss": 0.5134, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6988576650619507, |
|
"rewards/margins": 0.574633777141571, |
|
"rewards/rejected": -1.273491382598877, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7187013876838238e-07, |
|
"logits/chosen": -1.633247971534729, |
|
"logits/rejected": -1.6215112209320068, |
|
"logps/chosen": -28.081262588500977, |
|
"logps/rejected": -45.24711227416992, |
|
"loss": 0.4877, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.5856736302375793, |
|
"rewards/margins": 0.7724601030349731, |
|
"rewards/rejected": -1.3581336736679077, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.712928448291397e-07, |
|
"logits/chosen": -1.654266357421875, |
|
"logits/rejected": -1.6817476749420166, |
|
"logps/chosen": -33.884037017822266, |
|
"logps/rejected": -48.278202056884766, |
|
"loss": 0.5093, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.6435326337814331, |
|
"rewards/margins": 0.8194659948348999, |
|
"rewards/rejected": -1.462998628616333, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7071067811865473e-07, |
|
"logits/chosen": -1.6069905757904053, |
|
"logits/rejected": -1.6091623306274414, |
|
"logps/chosen": -32.96613311767578, |
|
"logps/rejected": -47.254539489746094, |
|
"loss": 0.5124, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7916733026504517, |
|
"rewards/margins": 0.6568180322647095, |
|
"rewards/rejected": -1.4484912157058716, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7012367842724884e-07, |
|
"logits/chosen": -1.5957623720169067, |
|
"logits/rejected": -1.5968074798583984, |
|
"logps/chosen": -26.651874542236328, |
|
"logps/rejected": -47.75643539428711, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7904036641120911, |
|
"rewards/margins": 0.7701337933540344, |
|
"rewards/rejected": -1.560537338256836, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.695318858755712e-07, |
|
"logits/chosen": -1.6403392553329468, |
|
"logits/rejected": -1.6463661193847656, |
|
"logps/chosen": -30.842029571533203, |
|
"logps/rejected": -45.244266510009766, |
|
"loss": 0.5166, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8566893339157104, |
|
"rewards/margins": 0.6294564008712769, |
|
"rewards/rejected": -1.4861459732055664, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.6893534091185658e-07, |
|
"logits/chosen": -1.6482681035995483, |
|
"logits/rejected": -1.665355920791626, |
|
"logps/chosen": -31.461956024169922, |
|
"logps/rejected": -42.383148193359375, |
|
"loss": 0.4946, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7341334223747253, |
|
"rewards/margins": 0.5131980180740356, |
|
"rewards/rejected": -1.2473313808441162, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.6833408430916082e-07, |
|
"logits/chosen": -1.6054385900497437, |
|
"logits/rejected": -1.6243665218353271, |
|
"logps/chosen": -27.294775009155273, |
|
"logps/rejected": -40.987762451171875, |
|
"loss": 0.4672, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.7351181507110596, |
|
"rewards/margins": 0.7994521856307983, |
|
"rewards/rejected": -1.5345704555511475, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.6772815716257412e-07, |
|
"logits/chosen": -1.59373140335083, |
|
"logits/rejected": -1.6416209936141968, |
|
"logps/chosen": -31.89492416381836, |
|
"logps/rejected": -47.964393615722656, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7752366065979004, |
|
"rewards/margins": 0.85821533203125, |
|
"rewards/rejected": -1.6334518194198608, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_logits/chosen": -1.7715307474136353, |
|
"eval_logits/rejected": -1.8386889696121216, |
|
"eval_logps/chosen": -35.16425704956055, |
|
"eval_logps/rejected": -49.16897201538086, |
|
"eval_loss": 0.4996708929538727, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -0.7888572812080383, |
|
"eval_rewards/margins": 0.739352285861969, |
|
"eval_rewards/rejected": -1.5282095670700073, |
|
"eval_runtime": 651.3576, |
|
"eval_samples_per_second": 30.705, |
|
"eval_steps_per_second": 0.481, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.6711760088641197e-07, |
|
"logits/chosen": -1.601464033126831, |
|
"logits/rejected": -1.649553894996643, |
|
"logps/chosen": -32.55518341064453, |
|
"logps/rejected": -48.19125747680664, |
|
"loss": 0.4954, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.970151424407959, |
|
"rewards/margins": 0.6675958633422852, |
|
"rewards/rejected": -1.6377474069595337, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.665024572113848e-07, |
|
"logits/chosen": -1.6783527135849, |
|
"logits/rejected": -1.722983956336975, |
|
"logps/chosen": -35.63679885864258, |
|
"logps/rejected": -50.843589782714844, |
|
"loss": 0.5021, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8517330884933472, |
|
"rewards/margins": 0.754002034664154, |
|
"rewards/rejected": -1.605735182762146, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.6588276818174578e-07, |
|
"logits/chosen": -1.605347990989685, |
|
"logits/rejected": -1.6674606800079346, |
|
"logps/chosen": -30.550785064697266, |
|
"logps/rejected": -53.16766357421875, |
|
"loss": 0.4996, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7929040193557739, |
|
"rewards/margins": 0.869250476360321, |
|
"rewards/rejected": -1.6621545553207397, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.6525857615241686e-07, |
|
"logits/chosen": -1.6046507358551025, |
|
"logits/rejected": -1.65438973903656, |
|
"logps/chosen": -37.572898864746094, |
|
"logps/rejected": -51.364967346191406, |
|
"loss": 0.4857, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8328970074653625, |
|
"rewards/margins": 0.8492236137390137, |
|
"rewards/rejected": -1.6821205615997314, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6462992378609406e-07, |
|
"logits/chosen": -1.600648283958435, |
|
"logits/rejected": -1.6318552494049072, |
|
"logps/chosen": -30.713436126708984, |
|
"logps/rejected": -50.46702194213867, |
|
"loss": 0.4972, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.8259468078613281, |
|
"rewards/margins": 0.843370258808136, |
|
"rewards/rejected": -1.6693168878555298, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6399685405033166e-07, |
|
"logits/chosen": -1.5601131916046143, |
|
"logits/rejected": -1.6074775457382202, |
|
"logps/chosen": -31.211395263671875, |
|
"logps/rejected": -45.20989227294922, |
|
"loss": 0.4976, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8995653986930847, |
|
"rewards/margins": 0.750219464302063, |
|
"rewards/rejected": -1.649784803390503, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6335941021460504e-07, |
|
"logits/chosen": -1.5442253351211548, |
|
"logits/rejected": -1.632916808128357, |
|
"logps/chosen": -31.537792205810547, |
|
"logps/rejected": -47.98058319091797, |
|
"loss": 0.4946, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9447064399719238, |
|
"rewards/margins": 0.827142596244812, |
|
"rewards/rejected": -1.7718490362167358, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.627176358473537e-07, |
|
"logits/chosen": -1.6395527124404907, |
|
"logits/rejected": -1.6777455806732178, |
|
"logps/chosen": -33.45917510986328, |
|
"logps/rejected": -52.585418701171875, |
|
"loss": 0.4725, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.7429215312004089, |
|
"rewards/margins": 0.9078781008720398, |
|
"rewards/rejected": -1.6507995128631592, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6207157481300312e-07, |
|
"logits/chosen": -1.7033193111419678, |
|
"logits/rejected": -1.7614482641220093, |
|
"logps/chosen": -30.210342407226562, |
|
"logps/rejected": -49.7141227722168, |
|
"loss": 0.4813, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.8633533716201782, |
|
"rewards/margins": 0.9977296590805054, |
|
"rewards/rejected": -1.8610830307006836, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.614212712689668e-07, |
|
"logits/chosen": -1.6111904382705688, |
|
"logits/rejected": -1.6214799880981445, |
|
"logps/chosen": -30.123058319091797, |
|
"logps/rejected": -46.51262664794922, |
|
"loss": 0.5166, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.7468183636665344, |
|
"rewards/margins": 0.8520328402519226, |
|
"rewards/rejected": -1.598851203918457, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.607667696626281e-07, |
|
"logits/chosen": -1.5769963264465332, |
|
"logits/rejected": -1.642521858215332, |
|
"logps/chosen": -28.669681549072266, |
|
"logps/rejected": -44.05248260498047, |
|
"loss": 0.4719, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8426681756973267, |
|
"rewards/margins": 0.7874193787574768, |
|
"rewards/rejected": -1.6300876140594482, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.601081147283025e-07, |
|
"logits/chosen": -1.6030101776123047, |
|
"logits/rejected": -1.647033452987671, |
|
"logps/chosen": -31.833459854125977, |
|
"logps/rejected": -47.821189880371094, |
|
"loss": 0.4791, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.983397364616394, |
|
"rewards/margins": 0.9160215258598328, |
|
"rewards/rejected": -1.8994190692901611, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.594453514841798e-07, |
|
"logits/chosen": -1.6228950023651123, |
|
"logits/rejected": -1.6250451803207397, |
|
"logps/chosen": -33.878692626953125, |
|
"logps/rejected": -42.81220245361328, |
|
"loss": 0.4711, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8752596974372864, |
|
"rewards/margins": 0.6205729842185974, |
|
"rewards/rejected": -1.4958326816558838, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.5877852522924732e-07, |
|
"logits/chosen": -1.5953247547149658, |
|
"logits/rejected": -1.6342957019805908, |
|
"logps/chosen": -32.81747817993164, |
|
"logps/rejected": -63.63328170776367, |
|
"loss": 0.46, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.7662860155105591, |
|
"rewards/margins": 1.2044267654418945, |
|
"rewards/rejected": -1.9707129001617432, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.5810768154019382e-07, |
|
"logits/chosen": -1.570973515510559, |
|
"logits/rejected": -1.5865790843963623, |
|
"logps/chosen": -33.97815704345703, |
|
"logps/rejected": -49.3431396484375, |
|
"loss": 0.4737, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9393548965454102, |
|
"rewards/margins": 0.8772164583206177, |
|
"rewards/rejected": -1.8165714740753174, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.5743286626829435e-07, |
|
"logits/chosen": -1.6847432851791382, |
|
"logits/rejected": -1.703770399093628, |
|
"logps/chosen": -36.89822769165039, |
|
"logps/rejected": -53.3497428894043, |
|
"loss": 0.482, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1248753070831299, |
|
"rewards/margins": 0.8558415174484253, |
|
"rewards/rejected": -1.9807169437408447, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.5675412553627636e-07, |
|
"logits/chosen": -1.5613644123077393, |
|
"logits/rejected": -1.569200873374939, |
|
"logps/chosen": -32.62022018432617, |
|
"logps/rejected": -50.714637756347656, |
|
"loss": 0.5126, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9382123947143555, |
|
"rewards/margins": 0.8733282089233398, |
|
"rewards/rejected": -1.8115408420562744, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.5607150573516727e-07, |
|
"logits/chosen": -1.6045001745224, |
|
"logits/rejected": -1.6578142642974854, |
|
"logps/chosen": -34.34115219116211, |
|
"logps/rejected": -49.66770553588867, |
|
"loss": 0.4586, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.0255018472671509, |
|
"rewards/margins": 1.1245434284210205, |
|
"rewards/rejected": -2.150045156478882, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.5538505352112372e-07, |
|
"logits/chosen": -1.575278639793396, |
|
"logits/rejected": -1.6097004413604736, |
|
"logps/chosen": -30.383869171142578, |
|
"logps/rejected": -47.15066146850586, |
|
"loss": 0.4524, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8728625178337097, |
|
"rewards/margins": 0.9022108316421509, |
|
"rewards/rejected": -1.7750734090805054, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.546948158122427e-07, |
|
"logits/chosen": -1.5903997421264648, |
|
"logits/rejected": -1.6119863986968994, |
|
"logps/chosen": -37.304439544677734, |
|
"logps/rejected": -55.2794075012207, |
|
"loss": 0.4853, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9771662950515747, |
|
"rewards/margins": 0.9782206416130066, |
|
"rewards/rejected": -1.9553868770599365, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_logits/chosen": -1.759588360786438, |
|
"eval_logits/rejected": -1.8255704641342163, |
|
"eval_logps/chosen": -36.62710189819336, |
|
"eval_logps/rejected": -52.2473030090332, |
|
"eval_loss": 0.48141878843307495, |
|
"eval_rewards/accuracies": 0.7531948685646057, |
|
"eval_rewards/chosen": -0.9351421594619751, |
|
"eval_rewards/margins": 0.9009013175964355, |
|
"eval_rewards/rejected": -1.836043357849121, |
|
"eval_runtime": 648.2488, |
|
"eval_samples_per_second": 30.852, |
|
"eval_steps_per_second": 0.483, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.540008397853547e-07, |
|
"logits/chosen": -1.6052520275115967, |
|
"logits/rejected": -1.6869052648544312, |
|
"logps/chosen": -31.37619972229004, |
|
"logps/rejected": -50.86903762817383, |
|
"loss": 0.4618, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8874313235282898, |
|
"rewards/margins": 1.1413828134536743, |
|
"rewards/rejected": -2.0288138389587402, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.5330317287279937e-07, |
|
"logits/chosen": -1.6188942193984985, |
|
"logits/rejected": -1.6576578617095947, |
|
"logps/chosen": -27.7589054107666, |
|
"logps/rejected": -45.0649528503418, |
|
"loss": 0.4524, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.8606212735176086, |
|
"rewards/margins": 0.9031599164009094, |
|
"rewards/rejected": -1.7637813091278076, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.526018627591834e-07, |
|
"logits/chosen": -1.6113197803497314, |
|
"logits/rejected": -1.6310360431671143, |
|
"logps/chosen": -31.3461971282959, |
|
"logps/rejected": -54.99705123901367, |
|
"loss": 0.4833, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0064165592193604, |
|
"rewards/margins": 1.113956093788147, |
|
"rewards/rejected": -2.1203725337982178, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.5189695737812152e-07, |
|
"logits/chosen": -1.5996081829071045, |
|
"logits/rejected": -1.6568666696548462, |
|
"logps/chosen": -40.4968147277832, |
|
"logps/rejected": -51.13838577270508, |
|
"loss": 0.4633, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1271008253097534, |
|
"rewards/margins": 0.8033941388130188, |
|
"rewards/rejected": -1.930495023727417, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.511885049089601e-07, |
|
"logits/chosen": -1.6359695196151733, |
|
"logits/rejected": -1.7165886163711548, |
|
"logps/chosen": -32.879817962646484, |
|
"logps/rejected": -48.68081283569336, |
|
"loss": 0.4635, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.958130955696106, |
|
"rewards/margins": 1.11000657081604, |
|
"rewards/rejected": -2.0681374073028564, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.5047655377348439e-07, |
|
"logits/chosen": -1.6750876903533936, |
|
"logits/rejected": -1.6934881210327148, |
|
"logps/chosen": -32.825477600097656, |
|
"logps/rejected": -48.85386276245117, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9325745701789856, |
|
"rewards/margins": 0.9969514608383179, |
|
"rewards/rejected": -1.9295259714126587, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.4976115263260874e-07, |
|
"logits/chosen": -1.5678603649139404, |
|
"logits/rejected": -1.661233901977539, |
|
"logps/chosen": -36.65790557861328, |
|
"logps/rejected": -48.384620666503906, |
|
"loss": 0.4576, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1775093078613281, |
|
"rewards/margins": 0.8282756805419922, |
|
"rewards/rejected": -2.0057852268218994, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.4904235038305082e-07, |
|
"logits/chosen": -1.6477429866790771, |
|
"logits/rejected": -1.6627209186553955, |
|
"logps/chosen": -34.94283676147461, |
|
"logps/rejected": -48.7259407043457, |
|
"loss": 0.4784, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1199615001678467, |
|
"rewards/margins": 0.6647182703018188, |
|
"rewards/rejected": -1.7846797704696655, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.483201961539896e-07, |
|
"logits/chosen": -1.619032859802246, |
|
"logits/rejected": -1.669075608253479, |
|
"logps/chosen": -30.97340965270996, |
|
"logps/rejected": -61.18779754638672, |
|
"loss": 0.4338, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.950448215007782, |
|
"rewards/margins": 1.3586184978485107, |
|
"rewards/rejected": -2.3090667724609375, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.4759473930370737e-07, |
|
"logits/chosen": -1.607339859008789, |
|
"logits/rejected": -1.595664620399475, |
|
"logps/chosen": -37.54076385498047, |
|
"logps/rejected": -58.147911071777344, |
|
"loss": 0.4578, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8425636291503906, |
|
"rewards/margins": 1.2238950729370117, |
|
"rewards/rejected": -2.0664587020874023, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.4686602941621615e-07, |
|
"logits/chosen": -1.6055225133895874, |
|
"logits/rejected": -1.6801090240478516, |
|
"logps/chosen": -39.49528884887695, |
|
"logps/rejected": -54.026329040527344, |
|
"loss": 0.4586, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2323989868164062, |
|
"rewards/margins": 0.6681613922119141, |
|
"rewards/rejected": -1.9005606174468994, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.4613411629786877e-07, |
|
"logits/chosen": -1.5876009464263916, |
|
"logits/rejected": -1.6064348220825195, |
|
"logps/chosen": -35.21834182739258, |
|
"logps/rejected": -57.62762451171875, |
|
"loss": 0.4449, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.0678521394729614, |
|
"rewards/margins": 1.2525171041488647, |
|
"rewards/rejected": -2.320369243621826, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.4539904997395468e-07, |
|
"logits/chosen": -1.5777242183685303, |
|
"logits/rejected": -1.6244598627090454, |
|
"logps/chosen": -33.42559814453125, |
|
"logps/rejected": -52.55967330932617, |
|
"loss": 0.4599, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.0470192432403564, |
|
"rewards/margins": 1.0231316089630127, |
|
"rewards/rejected": -2.070150852203369, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.4466088068528067e-07, |
|
"logits/chosen": -1.5677874088287354, |
|
"logits/rejected": -1.563103199005127, |
|
"logps/chosen": -31.043560028076172, |
|
"logps/rejected": -52.094017028808594, |
|
"loss": 0.4656, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.8798549771308899, |
|
"rewards/margins": 1.1949479579925537, |
|
"rewards/rejected": -2.074802875518799, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.4391965888473702e-07, |
|
"logits/chosen": -1.5365984439849854, |
|
"logits/rejected": -1.5609190464019775, |
|
"logps/chosen": -38.239593505859375, |
|
"logps/rejected": -51.5609245300293, |
|
"loss": 0.4535, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2842613458633423, |
|
"rewards/margins": 0.8575951457023621, |
|
"rewards/rejected": -2.1418566703796387, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.4317543523384928e-07, |
|
"logits/chosen": -1.522896647453308, |
|
"logits/rejected": -1.639601707458496, |
|
"logps/chosen": -32.27168273925781, |
|
"logps/rejected": -56.03171920776367, |
|
"loss": 0.4754, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2065473794937134, |
|
"rewards/margins": 1.2756444215774536, |
|
"rewards/rejected": -2.482191562652588, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.4242826059931536e-07, |
|
"logits/chosen": -1.6802387237548828, |
|
"logits/rejected": -1.691427230834961, |
|
"logps/chosen": -32.94379425048828, |
|
"logps/rejected": -50.2864875793457, |
|
"loss": 0.4789, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.9614617228507996, |
|
"rewards/margins": 1.0711755752563477, |
|
"rewards/rejected": -2.032637119293213, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.4167818604952903e-07, |
|
"logits/chosen": -1.6119060516357422, |
|
"logits/rejected": -1.6578251123428345, |
|
"logps/chosen": -34.777610778808594, |
|
"logps/rejected": -49.634010314941406, |
|
"loss": 0.4639, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.0565636157989502, |
|
"rewards/margins": 0.7960433959960938, |
|
"rewards/rejected": -1.852607011795044, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.4092526285108939e-07, |
|
"logits/chosen": -1.712721824645996, |
|
"logits/rejected": -1.752832055091858, |
|
"logps/chosen": -37.20418930053711, |
|
"logps/rejected": -53.678688049316406, |
|
"loss": 0.4833, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1638964414596558, |
|
"rewards/margins": 1.0863351821899414, |
|
"rewards/rejected": -2.2502317428588867, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.4016954246529695e-07, |
|
"logits/chosen": -1.5309230089187622, |
|
"logits/rejected": -1.579245924949646, |
|
"logps/chosen": -35.23495864868164, |
|
"logps/rejected": -51.5868034362793, |
|
"loss": 0.4421, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0845030546188354, |
|
"rewards/margins": 1.109590768814087, |
|
"rewards/rejected": -2.194093704223633, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_logits/chosen": -1.756993055343628, |
|
"eval_logits/rejected": -1.8224340677261353, |
|
"eval_logps/chosen": -38.5142707824707, |
|
"eval_logps/rejected": -55.59178924560547, |
|
"eval_loss": 0.46481600403785706, |
|
"eval_rewards/accuracies": 0.7643769979476929, |
|
"eval_rewards/chosen": -1.1238588094711304, |
|
"eval_rewards/margins": 1.046633005142212, |
|
"eval_rewards/rejected": -2.170491933822632, |
|
"eval_runtime": 658.2706, |
|
"eval_samples_per_second": 30.383, |
|
"eval_steps_per_second": 0.475, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.3941107654463616e-07, |
|
"logits/chosen": -1.6531871557235718, |
|
"logits/rejected": -1.6673660278320312, |
|
"logps/chosen": -31.110912322998047, |
|
"logps/rejected": -48.3098030090332, |
|
"loss": 0.4441, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9944551587104797, |
|
"rewards/margins": 1.0610458850860596, |
|
"rewards/rejected": -2.0555014610290527, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.3864991692924522e-07, |
|
"logits/chosen": -1.5531246662139893, |
|
"logits/rejected": -1.653599500656128, |
|
"logps/chosen": -40.24319076538086, |
|
"logps/rejected": -53.15726852416992, |
|
"loss": 0.4374, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.204906702041626, |
|
"rewards/margins": 0.9076882600784302, |
|
"rewards/rejected": -2.1125950813293457, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.3788611564337276e-07, |
|
"logits/chosen": -1.6369264125823975, |
|
"logits/rejected": -1.691960334777832, |
|
"logps/chosen": -35.385231018066406, |
|
"logps/rejected": -55.32622528076172, |
|
"loss": 0.4493, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.088341474533081, |
|
"rewards/margins": 1.0900700092315674, |
|
"rewards/rejected": -2.1784114837646484, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.3711972489182207e-07, |
|
"logits/chosen": -1.6485437154769897, |
|
"logits/rejected": -1.6988540887832642, |
|
"logps/chosen": -32.4288215637207, |
|
"logps/rejected": -58.22150802612305, |
|
"loss": 0.4148, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.9628303647041321, |
|
"rewards/margins": 1.4620063304901123, |
|
"rewards/rejected": -2.4248366355895996, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.3635079705638297e-07, |
|
"logits/chosen": -1.5908691883087158, |
|
"logits/rejected": -1.5887458324432373, |
|
"logps/chosen": -33.48944854736328, |
|
"logps/rejected": -47.57482147216797, |
|
"loss": 0.4581, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.048614740371704, |
|
"rewards/margins": 1.045222282409668, |
|
"rewards/rejected": -2.093837261199951, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.3557938469225164e-07, |
|
"logits/chosen": -1.496455430984497, |
|
"logits/rejected": -1.565406084060669, |
|
"logps/chosen": -28.345775604248047, |
|
"logps/rejected": -48.28296661376953, |
|
"loss": 0.4276, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.8644731640815735, |
|
"rewards/margins": 1.2932841777801514, |
|
"rewards/rejected": -2.157757520675659, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.3480554052443843e-07, |
|
"logits/chosen": -1.6535274982452393, |
|
"logits/rejected": -1.6487712860107422, |
|
"logps/chosen": -37.31575012207031, |
|
"logps/rejected": -55.0163459777832, |
|
"loss": 0.4411, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.180275797843933, |
|
"rewards/margins": 0.9364711046218872, |
|
"rewards/rejected": -2.1167469024658203, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.340293174441643e-07, |
|
"logits/chosen": -1.5797991752624512, |
|
"logits/rejected": -1.6624667644500732, |
|
"logps/chosen": -35.370601654052734, |
|
"logps/rejected": -52.5462646484375, |
|
"loss": 0.435, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2066878080368042, |
|
"rewards/margins": 1.18440842628479, |
|
"rewards/rejected": -2.3910961151123047, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.332507685052457e-07, |
|
"logits/chosen": -1.5771610736846924, |
|
"logits/rejected": -1.6233526468276978, |
|
"logps/chosen": -36.874610900878906, |
|
"logps/rejected": -50.1643180847168, |
|
"loss": 0.4601, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0964281558990479, |
|
"rewards/margins": 1.0329301357269287, |
|
"rewards/rejected": -2.1293582916259766, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.3246994692046836e-07, |
|
"logits/chosen": -1.5860016345977783, |
|
"logits/rejected": -1.5693161487579346, |
|
"logps/chosen": -34.47091293334961, |
|
"logps/rejected": -58.84907913208008, |
|
"loss": 0.4253, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.114932656288147, |
|
"rewards/margins": 1.1820452213287354, |
|
"rewards/rejected": -2.2969777584075928, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.3168690605795043e-07, |
|
"logits/chosen": -1.5049854516983032, |
|
"logits/rejected": -1.5469257831573486, |
|
"logps/chosen": -39.44264602661133, |
|
"logps/rejected": -49.3149299621582, |
|
"loss": 0.4473, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.0659205913543701, |
|
"rewards/margins": 1.319551944732666, |
|
"rewards/rejected": -2.3854727745056152, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.3090169943749475e-07, |
|
"logits/chosen": -1.595726728439331, |
|
"logits/rejected": -1.6204969882965088, |
|
"logps/chosen": -34.34868621826172, |
|
"logps/rejected": -64.38568115234375, |
|
"loss": 0.4518, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.0860964059829712, |
|
"rewards/margins": 1.4995462894439697, |
|
"rewards/rejected": -2.5856425762176514, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.3011438072693074e-07, |
|
"logits/chosen": -1.602315902709961, |
|
"logits/rejected": -1.654049277305603, |
|
"logps/chosen": -33.55876541137695, |
|
"logps/rejected": -59.39331817626953, |
|
"loss": 0.4508, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.0192234516143799, |
|
"rewards/margins": 1.5562570095062256, |
|
"rewards/rejected": -2.5754806995391846, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.2932500373844649e-07, |
|
"logits/chosen": -1.5844796895980835, |
|
"logits/rejected": -1.641312837600708, |
|
"logps/chosen": -35.43933868408203, |
|
"logps/rejected": -52.629310607910156, |
|
"loss": 0.4199, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.1855111122131348, |
|
"rewards/margins": 1.2338942289352417, |
|
"rewards/rejected": -2.419405460357666, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.2853362242491051e-07, |
|
"logits/chosen": -1.554743766784668, |
|
"logits/rejected": -1.5775463581085205, |
|
"logps/chosen": -38.14448928833008, |
|
"logps/rejected": -57.41911697387695, |
|
"loss": 0.4269, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0879703760147095, |
|
"rewards/margins": 1.3935130834579468, |
|
"rewards/rejected": -2.481483221054077, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.2774029087618446e-07, |
|
"logits/chosen": -1.5769081115722656, |
|
"logits/rejected": -1.6464605331420898, |
|
"logps/chosen": -40.778114318847656, |
|
"logps/rejected": -62.757667541503906, |
|
"loss": 0.4533, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.490722417831421, |
|
"rewards/margins": 1.171587347984314, |
|
"rewards/rejected": -2.6623096466064453, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.2694506331542577e-07, |
|
"logits/chosen": -1.572746992111206, |
|
"logits/rejected": -1.6023595333099365, |
|
"logps/chosen": -36.60691833496094, |
|
"logps/rejected": -61.279563903808594, |
|
"loss": 0.4322, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.055189609527588, |
|
"rewards/margins": 1.3352783918380737, |
|
"rewards/rejected": -2.390468120574951, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.2614799409538198e-07, |
|
"logits/chosen": -1.5945748090744019, |
|
"logits/rejected": -1.6221317052841187, |
|
"logps/chosen": -36.74169158935547, |
|
"logps/rejected": -55.456451416015625, |
|
"loss": 0.4427, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.1329044103622437, |
|
"rewards/margins": 1.147170066833496, |
|
"rewards/rejected": -2.2800745964050293, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.253491376946754e-07, |
|
"logits/chosen": -1.650987982749939, |
|
"logits/rejected": -1.6859050989151, |
|
"logps/chosen": -35.98708724975586, |
|
"logps/rejected": -60.302879333496094, |
|
"loss": 0.4441, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.0836153030395508, |
|
"rewards/margins": 1.4782553911209106, |
|
"rewards/rejected": -2.561870574951172, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.2454854871407992e-07, |
|
"logits/chosen": -1.6000125408172607, |
|
"logits/rejected": -1.6861261129379272, |
|
"logps/chosen": -37.93037796020508, |
|
"logps/rejected": -55.5538215637207, |
|
"loss": 0.4428, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.2983626127243042, |
|
"rewards/margins": 1.157037615776062, |
|
"rewards/rejected": -2.455399990081787, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_logits/chosen": -1.754516839981079, |
|
"eval_logits/rejected": -1.8192579746246338, |
|
"eval_logps/chosen": -39.144981384277344, |
|
"eval_logps/rejected": -57.34225082397461, |
|
"eval_loss": 0.4540169835090637, |
|
"eval_rewards/accuracies": 0.7651757001876831, |
|
"eval_rewards/chosen": -1.1869295835494995, |
|
"eval_rewards/margins": 1.1586089134216309, |
|
"eval_rewards/rejected": -2.34553861618042, |
|
"eval_runtime": 661.1199, |
|
"eval_samples_per_second": 30.252, |
|
"eval_steps_per_second": 0.473, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.2374628187278885e-07, |
|
"logits/chosen": -1.6199579238891602, |
|
"logits/rejected": -1.6416032314300537, |
|
"logps/chosen": -36.601234436035156, |
|
"logps/rejected": -49.50715255737305, |
|
"loss": 0.4519, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.2911183834075928, |
|
"rewards/margins": 0.9798983335494995, |
|
"rewards/rejected": -2.2710165977478027, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.2294239200467515e-07, |
|
"logits/chosen": -1.556757926940918, |
|
"logits/rejected": -1.586511254310608, |
|
"logps/chosen": -32.10668182373047, |
|
"logps/rejected": -58.393943786621094, |
|
"loss": 0.4559, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.089107871055603, |
|
"rewards/margins": 1.1830793619155884, |
|
"rewards/rejected": -2.2721872329711914, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.2213693405454345e-07, |
|
"logits/chosen": -1.7255284786224365, |
|
"logits/rejected": -1.718605399131775, |
|
"logps/chosen": -32.51628875732422, |
|
"logps/rejected": -49.17650604248047, |
|
"loss": 0.4712, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.175525426864624, |
|
"rewards/margins": 1.0937503576278687, |
|
"rewards/rejected": -2.2692759037017822, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.213299630743747e-07, |
|
"logits/chosen": -1.4932045936584473, |
|
"logits/rejected": -1.6324726343154907, |
|
"logps/chosen": -40.018192291259766, |
|
"logps/rejected": -57.08234786987305, |
|
"loss": 0.4608, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3543583154678345, |
|
"rewards/margins": 1.2706938982009888, |
|
"rewards/rejected": -2.625051975250244, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.205215342195634e-07, |
|
"logits/chosen": -1.6383206844329834, |
|
"logits/rejected": -1.6871874332427979, |
|
"logps/chosen": -33.45853805541992, |
|
"logps/rejected": -57.266441345214844, |
|
"loss": 0.4555, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0636919736862183, |
|
"rewards/margins": 1.4653905630111694, |
|
"rewards/rejected": -2.529082775115967, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.1971170274514802e-07, |
|
"logits/chosen": -1.6617015600204468, |
|
"logits/rejected": -1.7073558568954468, |
|
"logps/chosen": -34.81584930419922, |
|
"logps/rejected": -50.07064437866211, |
|
"loss": 0.4288, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.9755508303642273, |
|
"rewards/margins": 1.349270224571228, |
|
"rewards/rejected": -2.3248209953308105, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.1890052400203402e-07, |
|
"logits/chosen": -1.5495891571044922, |
|
"logits/rejected": -1.6336759328842163, |
|
"logps/chosen": -33.12566375732422, |
|
"logps/rejected": -51.78517532348633, |
|
"loss": 0.4653, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.1974151134490967, |
|
"rewards/margins": 1.0483092069625854, |
|
"rewards/rejected": -2.2457242012023926, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.18088053433211e-07, |
|
"logits/chosen": -1.528899908065796, |
|
"logits/rejected": -1.6124309301376343, |
|
"logps/chosen": -39.89997100830078, |
|
"logps/rejected": -61.714500427246094, |
|
"loss": 0.4124, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.9432302713394165, |
|
"rewards/margins": 1.5717684030532837, |
|
"rewards/rejected": -2.5149986743927, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.1727434656996305e-07, |
|
"logits/chosen": -1.499908208847046, |
|
"logits/rejected": -1.5667937994003296, |
|
"logps/chosen": -35.76382827758789, |
|
"logps/rejected": -52.2210578918457, |
|
"loss": 0.437, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.2630939483642578, |
|
"rewards/margins": 0.9919009208679199, |
|
"rewards/rejected": -2.2549948692321777, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.1645945902807339e-07, |
|
"logits/chosen": -1.4900109767913818, |
|
"logits/rejected": -1.5031189918518066, |
|
"logps/chosen": -37.20661926269531, |
|
"logps/rejected": -51.6440315246582, |
|
"loss": 0.4046, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1591427326202393, |
|
"rewards/margins": 0.9798423051834106, |
|
"rewards/rejected": -2.1389849185943604, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.1564344650402309e-07, |
|
"logits/chosen": -1.6302865743637085, |
|
"logits/rejected": -1.716436743736267, |
|
"logps/chosen": -39.310768127441406, |
|
"logps/rejected": -60.70337677001953, |
|
"loss": 0.4087, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.3054693937301636, |
|
"rewards/margins": 1.2337620258331299, |
|
"rewards/rejected": -2.539231538772583, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.1482636477118419e-07, |
|
"logits/chosen": -1.5733920335769653, |
|
"logits/rejected": -1.6743791103363037, |
|
"logps/chosen": -32.88540267944336, |
|
"logps/rejected": -55.623626708984375, |
|
"loss": 0.4443, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.010662317276001, |
|
"rewards/margins": 1.533831000328064, |
|
"rewards/rejected": -2.5444931983947754, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.1400826967600779e-07, |
|
"logits/chosen": -1.5256679058074951, |
|
"logits/rejected": -1.5824081897735596, |
|
"logps/chosen": -37.93513107299805, |
|
"logps/rejected": -60.231719970703125, |
|
"loss": 0.4454, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.357273817062378, |
|
"rewards/margins": 1.2650949954986572, |
|
"rewards/rejected": -2.6223690509796143, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.131892171342069e-07, |
|
"logits/chosen": -1.614967703819275, |
|
"logits/rejected": -1.67970871925354, |
|
"logps/chosen": -37.4019889831543, |
|
"logps/rejected": -57.1629638671875, |
|
"loss": 0.4669, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.1967872381210327, |
|
"rewards/margins": 1.4661554098129272, |
|
"rewards/rejected": -2.662942409515381, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.1236926312693478e-07, |
|
"logits/chosen": -1.6489760875701904, |
|
"logits/rejected": -1.6984256505966187, |
|
"logps/chosen": -37.733211517333984, |
|
"logps/rejected": -63.036476135253906, |
|
"loss": 0.4354, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.297151803970337, |
|
"rewards/margins": 1.3187490701675415, |
|
"rewards/rejected": -2.615900754928589, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.1154846369695863e-07, |
|
"logits/chosen": -1.5639992952346802, |
|
"logits/rejected": -1.643088936805725, |
|
"logps/chosen": -41.88310241699219, |
|
"logps/rejected": -60.37211990356445, |
|
"loss": 0.4446, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.3221181631088257, |
|
"rewards/margins": 1.4724633693695068, |
|
"rewards/rejected": -2.794581651687622, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.1072687494482918e-07, |
|
"logits/chosen": -1.548208236694336, |
|
"logits/rejected": -1.5871665477752686, |
|
"logps/chosen": -47.56840133666992, |
|
"logps/rejected": -66.3207778930664, |
|
"loss": 0.4672, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.517787218093872, |
|
"rewards/margins": 1.2745654582977295, |
|
"rewards/rejected": -2.7923526763916016, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.0990455302504628e-07, |
|
"logits/chosen": -1.5319437980651855, |
|
"logits/rejected": -1.6285107135772705, |
|
"logps/chosen": -42.794715881347656, |
|
"logps/rejected": -60.02912139892578, |
|
"loss": 0.4282, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.3966975212097168, |
|
"rewards/margins": 1.354801893234253, |
|
"rewards/rejected": -2.7514994144439697, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.0908155414222082e-07, |
|
"logits/chosen": -1.6465413570404053, |
|
"logits/rejected": -1.647565484046936, |
|
"logps/chosen": -38.375003814697266, |
|
"logps/rejected": -55.00554275512695, |
|
"loss": 0.4371, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3204407691955566, |
|
"rewards/margins": 0.98493492603302, |
|
"rewards/rejected": -2.305375576019287, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.0825793454723325e-07, |
|
"logits/chosen": -1.5337989330291748, |
|
"logits/rejected": -1.6268227100372314, |
|
"logps/chosen": -31.162311553955078, |
|
"logps/rejected": -53.79045867919922, |
|
"loss": 0.4313, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.9945358037948608, |
|
"rewards/margins": 1.6616699695587158, |
|
"rewards/rejected": -2.656205654144287, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_logits/chosen": -1.7576247453689575, |
|
"eval_logits/rejected": -1.8220233917236328, |
|
"eval_logps/chosen": -39.66193389892578, |
|
"eval_logps/rejected": -58.60415267944336, |
|
"eval_loss": 0.44551411271095276, |
|
"eval_rewards/accuracies": 0.7715654969215393, |
|
"eval_rewards/chosen": -1.2386250495910645, |
|
"eval_rewards/margins": 1.2331026792526245, |
|
"eval_rewards/rejected": -2.4717278480529785, |
|
"eval_runtime": 664.3994, |
|
"eval_samples_per_second": 30.102, |
|
"eval_steps_per_second": 0.471, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.0743375053338877e-07, |
|
"logits/chosen": -1.5790529251098633, |
|
"logits/rejected": -1.589842438697815, |
|
"logps/chosen": -34.36865997314453, |
|
"logps/rejected": -59.8324089050293, |
|
"loss": 0.4741, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.165611982345581, |
|
"rewards/margins": 1.1656267642974854, |
|
"rewards/rejected": -2.331238269805908, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.0660905843256993e-07, |
|
"logits/chosen": -1.5421807765960693, |
|
"logits/rejected": -1.5461338758468628, |
|
"logps/chosen": -34.33051300048828, |
|
"logps/rejected": -53.014427185058594, |
|
"loss": 0.4437, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.215490460395813, |
|
"rewards/margins": 1.2017762660980225, |
|
"rewards/rejected": -2.417266368865967, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.057839146113864e-07, |
|
"logits/chosen": -1.5130884647369385, |
|
"logits/rejected": -1.6016590595245361, |
|
"logps/chosen": -39.093894958496094, |
|
"logps/rejected": -51.003597259521484, |
|
"loss": 0.4505, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.2024251222610474, |
|
"rewards/margins": 1.1259633302688599, |
|
"rewards/rejected": -2.3283886909484863, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.0495837546732223e-07, |
|
"logits/chosen": -1.665765404701233, |
|
"logits/rejected": -1.7019054889678955, |
|
"logps/chosen": -40.30259704589844, |
|
"logps/rejected": -61.30155563354492, |
|
"loss": 0.4353, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.3531296253204346, |
|
"rewards/margins": 1.1265552043914795, |
|
"rewards/rejected": -2.479685068130493, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.0413249742488131e-07, |
|
"logits/chosen": -1.6483551263809204, |
|
"logits/rejected": -1.6976540088653564, |
|
"logps/chosen": -30.37175941467285, |
|
"logps/rejected": -57.36388397216797, |
|
"loss": 0.4403, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.0984838008880615, |
|
"rewards/margins": 1.4797937870025635, |
|
"rewards/rejected": -2.578277349472046, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.033063369317308e-07, |
|
"logits/chosen": -1.6444858312606812, |
|
"logits/rejected": -1.7046945095062256, |
|
"logps/chosen": -39.15083312988281, |
|
"logps/rejected": -57.31549835205078, |
|
"loss": 0.4556, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.297633409500122, |
|
"rewards/margins": 1.1475791931152344, |
|
"rewards/rejected": -2.4452126026153564, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.0247995045484301e-07, |
|
"logits/chosen": -1.7556772232055664, |
|
"logits/rejected": -1.778540015220642, |
|
"logps/chosen": -32.11585998535156, |
|
"logps/rejected": -59.79661178588867, |
|
"loss": 0.4196, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.0470155477523804, |
|
"rewards/margins": 1.7535333633422852, |
|
"rewards/rejected": -2.800549030303955, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.0165339447663586e-07, |
|
"logits/chosen": -1.621145248413086, |
|
"logits/rejected": -1.676640510559082, |
|
"logps/chosen": -41.72956466674805, |
|
"logps/rejected": -52.85761260986328, |
|
"loss": 0.4325, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.3959109783172607, |
|
"rewards/margins": 1.1459295749664307, |
|
"rewards/rejected": -2.5418403148651123, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.0082672549111248e-07, |
|
"logits/chosen": -1.5826036930084229, |
|
"logits/rejected": -1.5808308124542236, |
|
"logps/chosen": -42.364906311035156, |
|
"logps/rejected": -63.37822341918945, |
|
"loss": 0.4357, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.297041654586792, |
|
"rewards/margins": 1.3919389247894287, |
|
"rewards/rejected": -2.6889805793762207, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1e-07, |
|
"logits/chosen": -1.5667890310287476, |
|
"logits/rejected": -1.6108767986297607, |
|
"logps/chosen": -37.04503631591797, |
|
"logps/rejected": -70.06754302978516, |
|
"loss": 0.4323, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.1212371587753296, |
|
"rewards/margins": 1.5776902437210083, |
|
"rewards/rejected": -2.698927402496338, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.917327450888751e-08, |
|
"logits/chosen": -1.7088003158569336, |
|
"logits/rejected": -1.7888380289077759, |
|
"logps/chosen": -33.05218505859375, |
|
"logps/rejected": -53.8926887512207, |
|
"loss": 0.3986, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9519890546798706, |
|
"rewards/margins": 1.2312923669815063, |
|
"rewards/rejected": -2.183281183242798, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.834660552336415e-08, |
|
"logits/chosen": -1.555781602859497, |
|
"logits/rejected": -1.5879541635513306, |
|
"logps/chosen": -35.17169189453125, |
|
"logps/rejected": -62.30256271362305, |
|
"loss": 0.4364, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.192533016204834, |
|
"rewards/margins": 1.6748558282852173, |
|
"rewards/rejected": -2.8673884868621826, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.752004954515699e-08, |
|
"logits/chosen": -1.5017266273498535, |
|
"logits/rejected": -1.581365704536438, |
|
"logps/chosen": -38.57265853881836, |
|
"logps/rejected": -59.8960075378418, |
|
"loss": 0.3938, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.2197906970977783, |
|
"rewards/margins": 1.3951218128204346, |
|
"rewards/rejected": -2.614912509918213, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.669366306826918e-08, |
|
"logits/chosen": -1.675785779953003, |
|
"logits/rejected": -1.7346951961517334, |
|
"logps/chosen": -33.21458053588867, |
|
"logps/rejected": -59.9798583984375, |
|
"loss": 0.4476, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.257973313331604, |
|
"rewards/margins": 1.5266462564468384, |
|
"rewards/rejected": -2.7846198081970215, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.586750257511866e-08, |
|
"logits/chosen": -1.592603325843811, |
|
"logits/rejected": -1.630692720413208, |
|
"logps/chosen": -36.74489974975586, |
|
"logps/rejected": -55.419898986816406, |
|
"loss": 0.4328, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3241074085235596, |
|
"rewards/margins": 1.336132287979126, |
|
"rewards/rejected": -2.6602396965026855, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.504162453267775e-08, |
|
"logits/chosen": -1.561734914779663, |
|
"logits/rejected": -1.6008018255233765, |
|
"logps/chosen": -33.03455352783203, |
|
"logps/rejected": -62.66986846923828, |
|
"loss": 0.4287, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.328235149383545, |
|
"rewards/margins": 1.3843605518341064, |
|
"rewards/rejected": -2.7125957012176514, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.421608538861361e-08, |
|
"logits/chosen": -1.6418508291244507, |
|
"logits/rejected": -1.7292274236679077, |
|
"logps/chosen": -37.394859313964844, |
|
"logps/rejected": -52.148902893066406, |
|
"loss": 0.4368, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2655750513076782, |
|
"rewards/margins": 1.1081774234771729, |
|
"rewards/rejected": -2.3737523555755615, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.339094156743006e-08, |
|
"logits/chosen": -1.6823419332504272, |
|
"logits/rejected": -1.7162498235702515, |
|
"logps/chosen": -33.99588394165039, |
|
"logps/rejected": -62.70172882080078, |
|
"loss": 0.3985, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.046633005142212, |
|
"rewards/margins": 1.7750266790390015, |
|
"rewards/rejected": -2.821659564971924, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.256624946661125e-08, |
|
"logits/chosen": -1.5356955528259277, |
|
"logits/rejected": -1.5542399883270264, |
|
"logps/chosen": -36.15153884887695, |
|
"logps/rejected": -60.288482666015625, |
|
"loss": 0.4296, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.113905668258667, |
|
"rewards/margins": 1.3887674808502197, |
|
"rewards/rejected": -2.5026729106903076, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.174206545276677e-08, |
|
"logits/chosen": -1.6465669870376587, |
|
"logits/rejected": -1.6484692096710205, |
|
"logps/chosen": -39.20000076293945, |
|
"logps/rejected": -67.43548583984375, |
|
"loss": 0.3895, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1534570455551147, |
|
"rewards/margins": 1.7428104877471924, |
|
"rewards/rejected": -2.8962674140930176, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_logits/chosen": -1.7623785734176636, |
|
"eval_logits/rejected": -1.827060341835022, |
|
"eval_logps/chosen": -40.319217681884766, |
|
"eval_logps/rejected": -59.921573638916016, |
|
"eval_loss": 0.43849822878837585, |
|
"eval_rewards/accuracies": 0.7719648480415344, |
|
"eval_rewards/chosen": -1.3043534755706787, |
|
"eval_rewards/margins": 1.2991164922714233, |
|
"eval_rewards/rejected": -2.6034700870513916, |
|
"eval_runtime": 670.9922, |
|
"eval_samples_per_second": 29.807, |
|
"eval_steps_per_second": 0.466, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.091844585777917e-08, |
|
"logits/chosen": -1.5940697193145752, |
|
"logits/rejected": -1.6277192831039429, |
|
"logps/chosen": -45.242149353027344, |
|
"logps/rejected": -59.017784118652344, |
|
"loss": 0.4473, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4398057460784912, |
|
"rewards/margins": 1.2071270942687988, |
|
"rewards/rejected": -2.646932601928711, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.009544697495372e-08, |
|
"logits/chosen": -1.5828858613967896, |
|
"logits/rejected": -1.636697769165039, |
|
"logps/chosen": -37.46663284301758, |
|
"logps/rejected": -55.285606384277344, |
|
"loss": 0.4278, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2059729099273682, |
|
"rewards/margins": 1.2867709398269653, |
|
"rewards/rejected": -2.492743968963623, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.927312505517084e-08, |
|
"logits/chosen": -1.528216004371643, |
|
"logits/rejected": -1.5571744441986084, |
|
"logps/chosen": -35.96453857421875, |
|
"logps/rejected": -49.5126838684082, |
|
"loss": 0.4604, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.3253941535949707, |
|
"rewards/margins": 0.9808666110038757, |
|
"rewards/rejected": -2.3062610626220703, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.845153630304139e-08, |
|
"logits/chosen": -1.6100482940673828, |
|
"logits/rejected": -1.6352077722549438, |
|
"logps/chosen": -32.09465408325195, |
|
"logps/rejected": -59.7017822265625, |
|
"loss": 0.4167, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.1275434494018555, |
|
"rewards/margins": 1.606652021408081, |
|
"rewards/rejected": -2.7341952323913574, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.763073687306523e-08, |
|
"logits/chosen": -1.5683681964874268, |
|
"logits/rejected": -1.5995924472808838, |
|
"logps/chosen": -35.26288604736328, |
|
"logps/rejected": -56.42836380004883, |
|
"loss": 0.4528, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.483015537261963, |
|
"rewards/margins": 1.1930134296417236, |
|
"rewards/rejected": -2.6760289669036865, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.68107828657931e-08, |
|
"logits/chosen": -1.515114426612854, |
|
"logits/rejected": -1.5763431787490845, |
|
"logps/chosen": -36.468536376953125, |
|
"logps/rejected": -65.7103042602539, |
|
"loss": 0.3842, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.0401891469955444, |
|
"rewards/margins": 1.8647140264511108, |
|
"rewards/rejected": -2.9049034118652344, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.59917303239922e-08, |
|
"logits/chosen": -1.6495635509490967, |
|
"logits/rejected": -1.6883957386016846, |
|
"logps/chosen": -36.199073791503906, |
|
"logps/rejected": -55.47711944580078, |
|
"loss": 0.3981, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.1502685546875, |
|
"rewards/margins": 1.4356176853179932, |
|
"rewards/rejected": -2.585886001586914, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.517363522881579e-08, |
|
"logits/chosen": -1.5667603015899658, |
|
"logits/rejected": -1.6185035705566406, |
|
"logps/chosen": -32.305355072021484, |
|
"logps/rejected": -60.237579345703125, |
|
"loss": 0.4161, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.119470238685608, |
|
"rewards/margins": 1.565954327583313, |
|
"rewards/rejected": -2.685424327850342, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.435655349597689e-08, |
|
"logits/chosen": -1.618090033531189, |
|
"logits/rejected": -1.671553373336792, |
|
"logps/chosen": -35.39004898071289, |
|
"logps/rejected": -64.5052261352539, |
|
"loss": 0.4255, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.4270261526107788, |
|
"rewards/margins": 1.423475742340088, |
|
"rewards/rejected": -2.8505020141601562, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.354054097192658e-08, |
|
"logits/chosen": -1.5533418655395508, |
|
"logits/rejected": -1.6151001453399658, |
|
"logps/chosen": -41.16820526123047, |
|
"logps/rejected": -57.746246337890625, |
|
"loss": 0.413, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.3642529249191284, |
|
"rewards/margins": 1.594090461730957, |
|
"rewards/rejected": -2.958343267440796, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.2725653430037e-08, |
|
"logits/chosen": -1.6192905902862549, |
|
"logits/rejected": -1.6287028789520264, |
|
"logps/chosen": -39.75355911254883, |
|
"logps/rejected": -64.3330078125, |
|
"loss": 0.4372, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.475280523300171, |
|
"rewards/margins": 1.366707444190979, |
|
"rewards/rejected": -2.8419878482818604, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.191194656678904e-08, |
|
"logits/chosen": -1.6061389446258545, |
|
"logits/rejected": -1.6677024364471436, |
|
"logps/chosen": -38.45769500732422, |
|
"logps/rejected": -53.96708297729492, |
|
"loss": 0.4248, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.3363707065582275, |
|
"rewards/margins": 1.3108900785446167, |
|
"rewards/rejected": -2.647261142730713, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.109947599796598e-08, |
|
"logits/chosen": -1.563370943069458, |
|
"logits/rejected": -1.6023693084716797, |
|
"logps/chosen": -35.938392639160156, |
|
"logps/rejected": -66.66423797607422, |
|
"loss": 0.3955, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.204688549041748, |
|
"rewards/margins": 1.7450326681137085, |
|
"rewards/rejected": -2.949721097946167, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.028829725485198e-08, |
|
"logits/chosen": -1.5231170654296875, |
|
"logits/rejected": -1.5746376514434814, |
|
"logps/chosen": -42.712867736816406, |
|
"logps/rejected": -58.072288513183594, |
|
"loss": 0.4561, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.479255199432373, |
|
"rewards/margins": 1.2070258855819702, |
|
"rewards/rejected": -2.686281204223633, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.947846578043658e-08, |
|
"logits/chosen": -1.6910524368286133, |
|
"logits/rejected": -1.7825229167938232, |
|
"logps/chosen": -35.068939208984375, |
|
"logps/rejected": -63.149635314941406, |
|
"loss": 0.4065, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.9734908938407898, |
|
"rewards/margins": 1.701172113418579, |
|
"rewards/rejected": -2.6746630668640137, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.867003692562532e-08, |
|
"logits/chosen": -1.6070430278778076, |
|
"logits/rejected": -1.647657036781311, |
|
"logps/chosen": -35.467254638671875, |
|
"logps/rejected": -51.75764846801758, |
|
"loss": 0.42, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4265575408935547, |
|
"rewards/margins": 1.182373285293579, |
|
"rewards/rejected": -2.608931064605713, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.786306594545656e-08, |
|
"logits/chosen": -1.543676733970642, |
|
"logits/rejected": -1.613452672958374, |
|
"logps/chosen": -34.50806427001953, |
|
"logps/rejected": -49.17207336425781, |
|
"loss": 0.4661, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3245508670806885, |
|
"rewards/margins": 1.224057912826538, |
|
"rewards/rejected": -2.5486085414886475, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.705760799532485e-08, |
|
"logits/chosen": -1.6542270183563232, |
|
"logits/rejected": -1.676404595375061, |
|
"logps/chosen": -38.976356506347656, |
|
"logps/rejected": -61.9526481628418, |
|
"loss": 0.4308, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3091213703155518, |
|
"rewards/margins": 1.3484121561050415, |
|
"rewards/rejected": -2.6575334072113037, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.625371812721114e-08, |
|
"logits/chosen": -1.6496282815933228, |
|
"logits/rejected": -1.686861276626587, |
|
"logps/chosen": -43.114749908447266, |
|
"logps/rejected": -58.412376403808594, |
|
"loss": 0.4126, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.4000921249389648, |
|
"rewards/margins": 1.3142130374908447, |
|
"rewards/rejected": -2.7143054008483887, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.545145128592009e-08, |
|
"logits/chosen": -1.6392017602920532, |
|
"logits/rejected": -1.659065842628479, |
|
"logps/chosen": -34.54826736450195, |
|
"logps/rejected": -56.08115768432617, |
|
"loss": 0.4386, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3348114490509033, |
|
"rewards/margins": 1.3222072124481201, |
|
"rewards/rejected": -2.6570186614990234, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_logits/chosen": -1.7654143571853638, |
|
"eval_logits/rejected": -1.8298496007919312, |
|
"eval_logps/chosen": -40.549720764160156, |
|
"eval_logps/rejected": -60.5917854309082, |
|
"eval_loss": 0.43321406841278076, |
|
"eval_rewards/accuracies": 0.7775558829307556, |
|
"eval_rewards/chosen": -1.3274036645889282, |
|
"eval_rewards/margins": 1.3430880308151245, |
|
"eval_rewards/rejected": -2.6704916954040527, |
|
"eval_runtime": 672.9903, |
|
"eval_samples_per_second": 29.718, |
|
"eval_steps_per_second": 0.465, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.465086230532459e-08, |
|
"logits/chosen": -1.7243528366088867, |
|
"logits/rejected": -1.745429277420044, |
|
"logps/chosen": -35.07295608520508, |
|
"logps/rejected": -57.85649871826172, |
|
"loss": 0.4251, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.1319130659103394, |
|
"rewards/margins": 1.6454994678497314, |
|
"rewards/rejected": -2.7774124145507812, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.385200590461802e-08, |
|
"logits/chosen": -1.577544927597046, |
|
"logits/rejected": -1.5696823596954346, |
|
"logps/chosen": -34.15651321411133, |
|
"logps/rejected": -66.87596130371094, |
|
"loss": 0.4143, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.263463020324707, |
|
"rewards/margins": 1.7731821537017822, |
|
"rewards/rejected": -3.0366451740264893, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.305493668457419e-08, |
|
"logits/chosen": -1.5483906269073486, |
|
"logits/rejected": -1.611096978187561, |
|
"logps/chosen": -34.1319580078125, |
|
"logps/rejected": -58.55057907104492, |
|
"loss": 0.4032, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.100731372833252, |
|
"rewards/margins": 1.511641263961792, |
|
"rewards/rejected": -2.612372636795044, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.225970912381556e-08, |
|
"logits/chosen": -1.5925017595291138, |
|
"logits/rejected": -1.6905170679092407, |
|
"logps/chosen": -35.592220306396484, |
|
"logps/rejected": -57.27701950073242, |
|
"loss": 0.3736, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.184050440788269, |
|
"rewards/margins": 1.6517807245254517, |
|
"rewards/rejected": -2.8358309268951416, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.146637757508949e-08, |
|
"logits/chosen": -1.5678200721740723, |
|
"logits/rejected": -1.6304035186767578, |
|
"logps/chosen": -33.75212860107422, |
|
"logps/rejected": -57.301422119140625, |
|
"loss": 0.4147, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.9995731115341187, |
|
"rewards/margins": 1.5843132734298706, |
|
"rewards/rejected": -2.5838866233825684, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.067499626155353e-08, |
|
"logits/chosen": -1.426163911819458, |
|
"logits/rejected": -1.4889883995056152, |
|
"logps/chosen": -40.3751106262207, |
|
"logps/rejected": -62.611167907714844, |
|
"loss": 0.4304, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.4146851301193237, |
|
"rewards/margins": 1.500084400177002, |
|
"rewards/rejected": -2.914769411087036, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.988561927306926e-08, |
|
"logits/chosen": -1.6196644306182861, |
|
"logits/rejected": -1.6350748538970947, |
|
"logps/chosen": -34.81641387939453, |
|
"logps/rejected": -60.31574630737305, |
|
"loss": 0.387, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.2771868705749512, |
|
"rewards/margins": 1.5419944524765015, |
|
"rewards/rejected": -2.819181203842163, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.909830056250527e-08, |
|
"logits/chosen": -1.574197769165039, |
|
"logits/rejected": -1.6305344104766846, |
|
"logps/chosen": -41.36061096191406, |
|
"logps/rejected": -59.025543212890625, |
|
"loss": 0.4207, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.610036849975586, |
|
"rewards/margins": 1.2757810354232788, |
|
"rewards/rejected": -2.885817766189575, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.831309394204956e-08, |
|
"logits/chosen": -1.4915789365768433, |
|
"logits/rejected": -1.5080435276031494, |
|
"logps/chosen": -33.570152282714844, |
|
"logps/rejected": -60.77013397216797, |
|
"loss": 0.4148, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.416123390197754, |
|
"rewards/margins": 1.3803520202636719, |
|
"rewards/rejected": -2.7964751720428467, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.753005307953166e-08, |
|
"logits/chosen": -1.5550016164779663, |
|
"logits/rejected": -1.5937179327011108, |
|
"logps/chosen": -35.67682647705078, |
|
"logps/rejected": -50.56844711303711, |
|
"loss": 0.3835, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.2889750003814697, |
|
"rewards/margins": 1.1508995294570923, |
|
"rewards/rejected": -2.4398744106292725, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.674923149475432e-08, |
|
"logits/chosen": -1.6518977880477905, |
|
"logits/rejected": -1.7234160900115967, |
|
"logps/chosen": -34.21056365966797, |
|
"logps/rejected": -57.03264617919922, |
|
"loss": 0.4093, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.2470835447311401, |
|
"rewards/margins": 1.5973832607269287, |
|
"rewards/rejected": -2.8444666862487793, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.597068255583569e-08, |
|
"logits/chosen": -1.5592567920684814, |
|
"logits/rejected": -1.6327491998672485, |
|
"logps/chosen": -38.95235061645508, |
|
"logps/rejected": -53.28133010864258, |
|
"loss": 0.4692, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.4167959690093994, |
|
"rewards/margins": 1.2280877828598022, |
|
"rewards/rejected": -2.644883632659912, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.519445947556154e-08, |
|
"logits/chosen": -1.609717607498169, |
|
"logits/rejected": -1.6457746028900146, |
|
"logps/chosen": -37.1761360168457, |
|
"logps/rejected": -56.73893356323242, |
|
"loss": 0.4155, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2005279064178467, |
|
"rewards/margins": 1.3979219198226929, |
|
"rewards/rejected": -2.59844970703125, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.442061530774834e-08, |
|
"logits/chosen": -1.547778844833374, |
|
"logits/rejected": -1.6019436120986938, |
|
"logps/chosen": -39.04016876220703, |
|
"logps/rejected": -57.7167854309082, |
|
"loss": 0.4295, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.386942744255066, |
|
"rewards/margins": 1.4044148921966553, |
|
"rewards/rejected": -2.7913577556610107, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.3649202943617e-08, |
|
"logits/chosen": -1.541163444519043, |
|
"logits/rejected": -1.6102149486541748, |
|
"logps/chosen": -36.81908416748047, |
|
"logps/rejected": -59.110313415527344, |
|
"loss": 0.4437, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.454079031944275, |
|
"rewards/margins": 1.5748546123504639, |
|
"rewards/rejected": -3.0289340019226074, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.288027510817791e-08, |
|
"logits/chosen": -1.652465581893921, |
|
"logits/rejected": -1.6677935123443604, |
|
"logps/chosen": -33.92074966430664, |
|
"logps/rejected": -49.562801361083984, |
|
"loss": 0.4439, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2316993474960327, |
|
"rewards/margins": 1.157551884651184, |
|
"rewards/rejected": -2.389251232147217, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.211388435662721e-08, |
|
"logits/chosen": -1.627846121788025, |
|
"logits/rejected": -1.6586716175079346, |
|
"logps/chosen": -34.919586181640625, |
|
"logps/rejected": -57.35619354248047, |
|
"loss": 0.4321, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1946351528167725, |
|
"rewards/margins": 1.3707973957061768, |
|
"rewards/rejected": -2.565432548522949, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.135008307075479e-08, |
|
"logits/chosen": -1.5772037506103516, |
|
"logits/rejected": -1.6464507579803467, |
|
"logps/chosen": -35.31485366821289, |
|
"logps/rejected": -54.949005126953125, |
|
"loss": 0.4616, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2055904865264893, |
|
"rewards/margins": 1.443077802658081, |
|
"rewards/rejected": -2.6486685276031494, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.058892345536387e-08, |
|
"logits/chosen": -1.5377371311187744, |
|
"logits/rejected": -1.5599898099899292, |
|
"logps/chosen": -41.80292510986328, |
|
"logps/rejected": -53.32208251953125, |
|
"loss": 0.4572, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4467737674713135, |
|
"rewards/margins": 1.1260101795196533, |
|
"rewards/rejected": -2.5727837085723877, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5.983045753470308e-08, |
|
"logits/chosen": -1.6316235065460205, |
|
"logits/rejected": -1.6944658756256104, |
|
"logps/chosen": -33.74870681762695, |
|
"logps/rejected": -61.187347412109375, |
|
"loss": 0.4035, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.3623381853103638, |
|
"rewards/margins": 1.4390232563018799, |
|
"rewards/rejected": -2.801361560821533, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_logits/chosen": -1.7694017887115479, |
|
"eval_logits/rejected": -1.8335498571395874, |
|
"eval_logps/chosen": -40.61641311645508, |
|
"eval_logps/rejected": -60.981990814208984, |
|
"eval_loss": 0.4293850064277649, |
|
"eval_rewards/accuracies": 0.7811501622200012, |
|
"eval_rewards/chosen": -1.3340733051300049, |
|
"eval_rewards/margins": 1.3754388093948364, |
|
"eval_rewards/rejected": -2.7095119953155518, |
|
"eval_runtime": 677.4118, |
|
"eval_samples_per_second": 29.524, |
|
"eval_steps_per_second": 0.462, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5.9074737148910606e-08, |
|
"logits/chosen": -1.6358497142791748, |
|
"logits/rejected": -1.6877262592315674, |
|
"logps/chosen": -33.53594207763672, |
|
"logps/rejected": -52.47662353515625, |
|
"loss": 0.4214, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1719892024993896, |
|
"rewards/margins": 1.2264606952667236, |
|
"rewards/rejected": -2.398449659347534, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5.832181395047098e-08, |
|
"logits/chosen": -1.5000022649765015, |
|
"logits/rejected": -1.5386083126068115, |
|
"logps/chosen": -34.92070770263672, |
|
"logps/rejected": -61.40190887451172, |
|
"loss": 0.4332, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.2568442821502686, |
|
"rewards/margins": 1.5954145193099976, |
|
"rewards/rejected": -2.8522589206695557, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5.7571739400684635e-08, |
|
"logits/chosen": -1.5450022220611572, |
|
"logits/rejected": -1.6439878940582275, |
|
"logps/chosen": -36.46127700805664, |
|
"logps/rejected": -57.535743713378906, |
|
"loss": 0.4263, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1943490505218506, |
|
"rewards/margins": 1.5856481790542603, |
|
"rewards/rejected": -2.7799973487854004, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5.682456476615072e-08, |
|
"logits/chosen": -1.5656230449676514, |
|
"logits/rejected": -1.6265404224395752, |
|
"logps/chosen": -37.38570022583008, |
|
"logps/rejected": -59.56365966796875, |
|
"loss": 0.4365, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4264448881149292, |
|
"rewards/margins": 1.2477037906646729, |
|
"rewards/rejected": -2.6741487979888916, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5.6080341115262976e-08, |
|
"logits/chosen": -1.6068060398101807, |
|
"logits/rejected": -1.656057357788086, |
|
"logps/chosen": -39.901092529296875, |
|
"logps/rejected": -60.206336975097656, |
|
"loss": 0.4312, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.5584208965301514, |
|
"rewards/margins": 1.1731221675872803, |
|
"rewards/rejected": -2.7315430641174316, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5.533911931471935e-08, |
|
"logits/chosen": -1.607656478881836, |
|
"logits/rejected": -1.6602356433868408, |
|
"logps/chosen": -35.008094787597656, |
|
"logps/rejected": -58.70320510864258, |
|
"loss": 0.4371, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.375211477279663, |
|
"rewards/margins": 1.3568847179412842, |
|
"rewards/rejected": -2.7320961952209473, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5.460095002604532e-08, |
|
"logits/chosen": -1.5863715410232544, |
|
"logits/rejected": -1.5841939449310303, |
|
"logps/chosen": -37.74492263793945, |
|
"logps/rejected": -61.49238967895508, |
|
"loss": 0.4058, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.1969599723815918, |
|
"rewards/margins": 1.476892352104187, |
|
"rewards/rejected": -2.6738524436950684, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5.386588370213123e-08, |
|
"logits/chosen": -1.6613683700561523, |
|
"logits/rejected": -1.6925634145736694, |
|
"logps/chosen": -32.180824279785156, |
|
"logps/rejected": -64.28355407714844, |
|
"loss": 0.4017, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.0029981136322021, |
|
"rewards/margins": 1.6082184314727783, |
|
"rewards/rejected": -2.6112160682678223, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5.313397058378386e-08, |
|
"logits/chosen": -1.5557955503463745, |
|
"logits/rejected": -1.6191835403442383, |
|
"logps/chosen": -44.55752182006836, |
|
"logps/rejected": -57.750877380371094, |
|
"loss": 0.4101, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.429595708847046, |
|
"rewards/margins": 1.3189971446990967, |
|
"rewards/rejected": -2.7485926151275635, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5.240526069629264e-08, |
|
"logits/chosen": -1.6459201574325562, |
|
"logits/rejected": -1.6954996585845947, |
|
"logps/chosen": -30.61590576171875, |
|
"logps/rejected": -54.773284912109375, |
|
"loss": 0.4337, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.0733121633529663, |
|
"rewards/margins": 1.3275502920150757, |
|
"rewards/rejected": -2.400862455368042, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5.1679803846010403e-08, |
|
"logits/chosen": -1.6004453897476196, |
|
"logits/rejected": -1.636222243309021, |
|
"logps/chosen": -36.51213073730469, |
|
"logps/rejected": -56.715606689453125, |
|
"loss": 0.4328, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2924304008483887, |
|
"rewards/margins": 1.5186452865600586, |
|
"rewards/rejected": -2.811075448989868, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5.0957649616949215e-08, |
|
"logits/chosen": -1.6312261819839478, |
|
"logits/rejected": -1.640782117843628, |
|
"logps/chosen": -37.51857376098633, |
|
"logps/rejected": -70.64649200439453, |
|
"loss": 0.4218, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.0793393850326538, |
|
"rewards/margins": 1.7332353591918945, |
|
"rewards/rejected": -2.812574863433838, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5.0238847367391314e-08, |
|
"logits/chosen": -1.5880235433578491, |
|
"logits/rejected": -1.6467769145965576, |
|
"logps/chosen": -32.823516845703125, |
|
"logps/rejected": -61.8253288269043, |
|
"loss": 0.4022, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.0444438457489014, |
|
"rewards/margins": 1.6926997900009155, |
|
"rewards/rejected": -2.7371437549591064, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.952344622651565e-08, |
|
"logits/chosen": -1.6432594060897827, |
|
"logits/rejected": -1.6623003482818604, |
|
"logps/chosen": -32.66761779785156, |
|
"logps/rejected": -61.32550048828125, |
|
"loss": 0.4036, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.2330167293548584, |
|
"rewards/margins": 1.3832911252975464, |
|
"rewards/rejected": -2.6163077354431152, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.8811495091039923e-08, |
|
"logits/chosen": -1.6371605396270752, |
|
"logits/rejected": -1.6313444375991821, |
|
"logps/chosen": -36.848670959472656, |
|
"logps/rejected": -58.194969177246094, |
|
"loss": 0.4266, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.425100564956665, |
|
"rewards/margins": 1.1800484657287598, |
|
"rewards/rejected": -2.6051487922668457, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.810304262187851e-08, |
|
"logits/chosen": -1.5991828441619873, |
|
"logits/rejected": -1.6860853433609009, |
|
"logps/chosen": -41.5286865234375, |
|
"logps/rejected": -60.21526336669922, |
|
"loss": 0.402, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.278834581375122, |
|
"rewards/margins": 1.2690808773040771, |
|
"rewards/rejected": -2.547915458679199, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.739813724081661e-08, |
|
"logits/chosen": -1.6154054403305054, |
|
"logits/rejected": -1.6397669315338135, |
|
"logps/chosen": -40.9693489074707, |
|
"logps/rejected": -64.9931640625, |
|
"loss": 0.3921, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.4517185688018799, |
|
"rewards/margins": 1.4309002161026, |
|
"rewards/rejected": -2.8826186656951904, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.6696827127200644e-08, |
|
"logits/chosen": -1.7005449533462524, |
|
"logits/rejected": -1.7360703945159912, |
|
"logps/chosen": -36.8565559387207, |
|
"logps/rejected": -56.22419357299805, |
|
"loss": 0.4363, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3760679960250854, |
|
"rewards/margins": 1.0964031219482422, |
|
"rewards/rejected": -2.472470998764038, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.599916021464531e-08, |
|
"logits/chosen": -1.5377979278564453, |
|
"logits/rejected": -1.6516033411026, |
|
"logps/chosen": -39.64506149291992, |
|
"logps/rejected": -54.380836486816406, |
|
"loss": 0.4367, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.8013836145401, |
|
"rewards/margins": 1.0361871719360352, |
|
"rewards/rejected": -2.8375706672668457, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.530518418775733e-08, |
|
"logits/chosen": -1.5973961353302002, |
|
"logits/rejected": -1.6904951333999634, |
|
"logps/chosen": -30.8753604888916, |
|
"logps/rejected": -58.85967254638672, |
|
"loss": 0.435, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.0102612972259521, |
|
"rewards/margins": 1.7187198400497437, |
|
"rewards/rejected": -2.7289814949035645, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_logits/chosen": -1.7733038663864136, |
|
"eval_logits/rejected": -1.8373956680297852, |
|
"eval_logps/chosen": -40.727088928222656, |
|
"eval_logps/rejected": -61.374794006347656, |
|
"eval_loss": 0.4264953136444092, |
|
"eval_rewards/accuracies": 0.780351459980011, |
|
"eval_rewards/chosen": -1.3451405763626099, |
|
"eval_rewards/margins": 1.403651475906372, |
|
"eval_rewards/rejected": -2.7487919330596924, |
|
"eval_runtime": 676.5091, |
|
"eval_samples_per_second": 29.564, |
|
"eval_steps_per_second": 0.463, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4614946478876306e-08, |
|
"logits/chosen": -1.5811793804168701, |
|
"logits/rejected": -1.6037524938583374, |
|
"logps/chosen": -41.72180938720703, |
|
"logps/rejected": -56.50455856323242, |
|
"loss": 0.4415, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3052692413330078, |
|
"rewards/margins": 1.4770625829696655, |
|
"rewards/rejected": -2.782331943511963, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.392849426483274e-08, |
|
"logits/chosen": -1.5790523290634155, |
|
"logits/rejected": -1.6081444025039673, |
|
"logps/chosen": -36.94837188720703, |
|
"logps/rejected": -61.35870361328125, |
|
"loss": 0.432, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.3901188373565674, |
|
"rewards/margins": 1.331687092781067, |
|
"rewards/rejected": -2.721806049346924, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.324587446372364e-08, |
|
"logits/chosen": -1.6028099060058594, |
|
"logits/rejected": -1.6534366607666016, |
|
"logps/chosen": -39.56840515136719, |
|
"logps/rejected": -59.75825119018555, |
|
"loss": 0.4361, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5059459209442139, |
|
"rewards/margins": 1.4793611764907837, |
|
"rewards/rejected": -2.985307216644287, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.256713373170564e-08, |
|
"logits/chosen": -1.5526655912399292, |
|
"logits/rejected": -1.6214317083358765, |
|
"logps/chosen": -38.09294509887695, |
|
"logps/rejected": -58.60236358642578, |
|
"loss": 0.4803, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.434067726135254, |
|
"rewards/margins": 1.3282562494277954, |
|
"rewards/rejected": -2.762324094772339, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.1892318459806175e-08, |
|
"logits/chosen": -1.5805480480194092, |
|
"logits/rejected": -1.6314042806625366, |
|
"logps/chosen": -37.16118240356445, |
|
"logps/rejected": -51.327178955078125, |
|
"loss": 0.424, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4472792148590088, |
|
"rewards/margins": 1.1773505210876465, |
|
"rewards/rejected": -2.6246297359466553, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.1221474770752695e-08, |
|
"logits/chosen": -1.5926434993743896, |
|
"logits/rejected": -1.6545213460922241, |
|
"logps/chosen": -34.86847686767578, |
|
"logps/rejected": -57.7491455078125, |
|
"loss": 0.4286, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.1985056400299072, |
|
"rewards/margins": 1.478855848312378, |
|
"rewards/rejected": -2.6773617267608643, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.055464851582021e-08, |
|
"logits/chosen": -1.6083743572235107, |
|
"logits/rejected": -1.718679428100586, |
|
"logps/chosen": -32.2127685546875, |
|
"logps/rejected": -67.01476287841797, |
|
"loss": 0.3731, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.0988613367080688, |
|
"rewards/margins": 2.053067207336426, |
|
"rewards/rejected": -3.151928186416626, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.989188527169749e-08, |
|
"logits/chosen": -1.5989800691604614, |
|
"logits/rejected": -1.6427767276763916, |
|
"logps/chosen": -36.41338348388672, |
|
"logps/rejected": -56.013519287109375, |
|
"loss": 0.409, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3234190940856934, |
|
"rewards/margins": 1.381638526916504, |
|
"rewards/rejected": -2.7050576210021973, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.923323033737188e-08, |
|
"logits/chosen": -1.5973937511444092, |
|
"logits/rejected": -1.6154829263687134, |
|
"logps/chosen": -41.698753356933594, |
|
"logps/rejected": -56.54584503173828, |
|
"loss": 0.4451, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5426132678985596, |
|
"rewards/margins": 1.18071711063385, |
|
"rewards/rejected": -2.723330497741699, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.857872873103322e-08, |
|
"logits/chosen": -1.6224912405014038, |
|
"logits/rejected": -1.6204932928085327, |
|
"logps/chosen": -36.34685516357422, |
|
"logps/rejected": -48.54065704345703, |
|
"loss": 0.453, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.276078462600708, |
|
"rewards/margins": 1.1468660831451416, |
|
"rewards/rejected": -2.4229445457458496, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.7928425186996883e-08, |
|
"logits/chosen": -1.628617525100708, |
|
"logits/rejected": -1.7156063318252563, |
|
"logps/chosen": -31.950937271118164, |
|
"logps/rejected": -52.45146560668945, |
|
"loss": 0.4136, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2364708185195923, |
|
"rewards/margins": 1.715372085571289, |
|
"rewards/rejected": -2.951842784881592, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.7282364152646295e-08, |
|
"logits/chosen": -1.6578394174575806, |
|
"logits/rejected": -1.7061634063720703, |
|
"logps/chosen": -35.03657913208008, |
|
"logps/rejected": -70.58955383300781, |
|
"loss": 0.3766, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.331438422203064, |
|
"rewards/margins": 1.8967783451080322, |
|
"rewards/rejected": -3.2282166481018066, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.664058978539495e-08, |
|
"logits/chosen": -1.5942935943603516, |
|
"logits/rejected": -1.6189035177230835, |
|
"logps/chosen": -42.353904724121094, |
|
"logps/rejected": -57.705413818359375, |
|
"loss": 0.4212, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5647684335708618, |
|
"rewards/margins": 1.509913682937622, |
|
"rewards/rejected": -3.0746822357177734, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.600314594966833e-08, |
|
"logits/chosen": -1.6005092859268188, |
|
"logits/rejected": -1.6767311096191406, |
|
"logps/chosen": -43.09211349487305, |
|
"logps/rejected": -60.247520446777344, |
|
"loss": 0.4038, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.4567948579788208, |
|
"rewards/margins": 1.4423904418945312, |
|
"rewards/rejected": -2.8991851806640625, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.53700762139059e-08, |
|
"logits/chosen": -1.6018587350845337, |
|
"logits/rejected": -1.6889028549194336, |
|
"logps/chosen": -37.60720443725586, |
|
"logps/rejected": -58.150054931640625, |
|
"loss": 0.3888, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.441217064857483, |
|
"rewards/margins": 1.420534372329712, |
|
"rewards/rejected": -2.8617515563964844, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.474142384758313e-08, |
|
"logits/chosen": -1.6099767684936523, |
|
"logits/rejected": -1.699637770652771, |
|
"logps/chosen": -40.36447525024414, |
|
"logps/rejected": -59.49592971801758, |
|
"loss": 0.4391, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.593315839767456, |
|
"rewards/margins": 1.6204936504364014, |
|
"rewards/rejected": -3.2138094902038574, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.41172318182542e-08, |
|
"logits/chosen": -1.634606957435608, |
|
"logits/rejected": -1.627526044845581, |
|
"logps/chosen": -34.031497955322266, |
|
"logps/rejected": -55.68294143676758, |
|
"loss": 0.4205, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2308284044265747, |
|
"rewards/margins": 1.326682448387146, |
|
"rewards/rejected": -2.5575108528137207, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.349754278861516e-08, |
|
"logits/chosen": -1.647221326828003, |
|
"logits/rejected": -1.6850544214248657, |
|
"logps/chosen": -42.39990234375, |
|
"logps/rejected": -56.22304153442383, |
|
"loss": 0.4265, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4471819400787354, |
|
"rewards/margins": 1.2381715774536133, |
|
"rewards/rejected": -2.6853535175323486, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.2882399113588066e-08, |
|
"logits/chosen": -1.6549670696258545, |
|
"logits/rejected": -1.636690378189087, |
|
"logps/chosen": -35.067543029785156, |
|
"logps/rejected": -57.54313278198242, |
|
"loss": 0.3982, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.2351791858673096, |
|
"rewards/margins": 1.4587191343307495, |
|
"rewards/rejected": -2.6938984394073486, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.227184283742591e-08, |
|
"logits/chosen": -1.566417932510376, |
|
"logits/rejected": -1.5962722301483154, |
|
"logps/chosen": -44.59258270263672, |
|
"logps/rejected": -54.915374755859375, |
|
"loss": 0.4202, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.6373296976089478, |
|
"rewards/margins": 1.1503968238830566, |
|
"rewards/rejected": -2.787726879119873, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_logits/chosen": -1.7744992971420288, |
|
"eval_logits/rejected": -1.8385426998138428, |
|
"eval_logps/chosen": -41.52806091308594, |
|
"eval_logps/rejected": -62.49430465698242, |
|
"eval_loss": 0.42402884364128113, |
|
"eval_rewards/accuracies": 0.7823482155799866, |
|
"eval_rewards/chosen": -1.425237774848938, |
|
"eval_rewards/margins": 1.4355047941207886, |
|
"eval_rewards/rejected": -2.8607423305511475, |
|
"eval_runtime": 682.38, |
|
"eval_samples_per_second": 29.309, |
|
"eval_steps_per_second": 0.459, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.166591569083916e-08, |
|
"logits/chosen": -1.6541789770126343, |
|
"logits/rejected": -1.666421890258789, |
|
"logps/chosen": -51.190147399902344, |
|
"logps/rejected": -61.54121780395508, |
|
"loss": 0.4285, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.477967619895935, |
|
"rewards/margins": 1.5420701503753662, |
|
"rewards/rejected": -3.0200376510620117, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.106465908814342e-08, |
|
"logits/chosen": -1.566292643547058, |
|
"logits/rejected": -1.586924433708191, |
|
"logps/chosen": -40.33735275268555, |
|
"logps/rejected": -58.9432258605957, |
|
"loss": 0.3928, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4586553573608398, |
|
"rewards/margins": 1.309951663017273, |
|
"rewards/rejected": -2.7686069011688232, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.04681141244288e-08, |
|
"logits/chosen": -1.5833979845046997, |
|
"logits/rejected": -1.6595224142074585, |
|
"logps/chosen": -35.47198486328125, |
|
"logps/rejected": -64.98353576660156, |
|
"loss": 0.437, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.2309479713439941, |
|
"rewards/margins": 1.9191757440567017, |
|
"rewards/rejected": -3.1501235961914062, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.987632157275114e-08, |
|
"logits/chosen": -1.571750283241272, |
|
"logits/rejected": -1.6132491827011108, |
|
"logps/chosen": -44.85167694091797, |
|
"logps/rejected": -68.72865295410156, |
|
"loss": 0.433, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.527980923652649, |
|
"rewards/margins": 1.5129810571670532, |
|
"rewards/rejected": -3.0409622192382812, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.928932188134525e-08, |
|
"logits/chosen": -1.495047926902771, |
|
"logits/rejected": -1.5616710186004639, |
|
"logps/chosen": -41.87842559814453, |
|
"logps/rejected": -72.66767883300781, |
|
"loss": 0.4095, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.471101999282837, |
|
"rewards/margins": 1.7681621313095093, |
|
"rewards/rejected": -3.2392642498016357, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.8707155170860297e-08, |
|
"logits/chosen": -1.7271652221679688, |
|
"logits/rejected": -1.7250388860702515, |
|
"logps/chosen": -39.89320373535156, |
|
"logps/rejected": -60.249778747558594, |
|
"loss": 0.4112, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5881242752075195, |
|
"rewards/margins": 1.2204608917236328, |
|
"rewards/rejected": -2.8085849285125732, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.8129861231617612e-08, |
|
"logits/chosen": -1.5804831981658936, |
|
"logits/rejected": -1.6398807764053345, |
|
"logps/chosen": -43.65148162841797, |
|
"logps/rejected": -62.10638427734375, |
|
"loss": 0.45, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4383070468902588, |
|
"rewards/margins": 1.6000694036483765, |
|
"rewards/rejected": -3.038376569747925, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.7557479520891104e-08, |
|
"logits/chosen": -1.5704247951507568, |
|
"logits/rejected": -1.6201874017715454, |
|
"logps/chosen": -36.51436233520508, |
|
"logps/rejected": -67.26272583007812, |
|
"loss": 0.3921, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2914024591445923, |
|
"rewards/margins": 1.8169571161270142, |
|
"rewards/rejected": -3.1083598136901855, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.699004916021038e-08, |
|
"logits/chosen": -1.6186180114746094, |
|
"logits/rejected": -1.647109031677246, |
|
"logps/chosen": -40.04192352294922, |
|
"logps/rejected": -61.596595764160156, |
|
"loss": 0.4176, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2729463577270508, |
|
"rewards/margins": 1.463202714920044, |
|
"rewards/rejected": -2.7361490726470947, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.642760893268684e-08, |
|
"logits/chosen": -1.6283318996429443, |
|
"logits/rejected": -1.6734449863433838, |
|
"logps/chosen": -41.37560272216797, |
|
"logps/rejected": -61.95280838012695, |
|
"loss": 0.4289, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.6277186870574951, |
|
"rewards/margins": 1.0712679624557495, |
|
"rewards/rejected": -2.698986768722534, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.5870197280362915e-08, |
|
"logits/chosen": -1.5385441780090332, |
|
"logits/rejected": -1.5929187536239624, |
|
"logps/chosen": -36.092308044433594, |
|
"logps/rejected": -59.61371994018555, |
|
"loss": 0.4337, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4656503200531006, |
|
"rewards/margins": 1.491877555847168, |
|
"rewards/rejected": -2.9575278759002686, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.5317852301584643e-08, |
|
"logits/chosen": -1.7016273736953735, |
|
"logits/rejected": -1.7216222286224365, |
|
"logps/chosen": -42.860923767089844, |
|
"logps/rejected": -68.04586791992188, |
|
"loss": 0.4334, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8822638988494873, |
|
"rewards/margins": 1.0386995077133179, |
|
"rewards/rejected": -2.9209635257720947, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.477061174839755e-08, |
|
"logits/chosen": -1.667069435119629, |
|
"logits/rejected": -1.686384916305542, |
|
"logps/chosen": -35.156497955322266, |
|
"logps/rejected": -59.22174835205078, |
|
"loss": 0.4266, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.3924586772918701, |
|
"rewards/margins": 1.4962780475616455, |
|
"rewards/rejected": -2.8887367248535156, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.4228513023966547e-08, |
|
"logits/chosen": -1.5280156135559082, |
|
"logits/rejected": -1.576804757118225, |
|
"logps/chosen": -38.35932159423828, |
|
"logps/rejected": -57.656890869140625, |
|
"loss": 0.4513, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.3916337490081787, |
|
"rewards/margins": 1.4954885244369507, |
|
"rewards/rejected": -2.887122392654419, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.3691593180019364e-08, |
|
"logits/chosen": -1.6398168802261353, |
|
"logits/rejected": -1.6756404638290405, |
|
"logps/chosen": -37.17683410644531, |
|
"logps/rejected": -66.59625244140625, |
|
"loss": 0.4156, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.4407873153686523, |
|
"rewards/margins": 1.6703660488128662, |
|
"rewards/rejected": -3.1111538410186768, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.315988891431412e-08, |
|
"logits/chosen": -1.5830076932907104, |
|
"logits/rejected": -1.6350247859954834, |
|
"logps/chosen": -39.05982208251953, |
|
"logps/rejected": -58.4395751953125, |
|
"loss": 0.4463, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5787169933319092, |
|
"rewards/margins": 1.208711862564087, |
|
"rewards/rejected": -2.787428617477417, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.263343656813107e-08, |
|
"logits/chosen": -1.6600052118301392, |
|
"logits/rejected": -1.723974585533142, |
|
"logps/chosen": -38.07198715209961, |
|
"logps/rejected": -54.6322135925293, |
|
"loss": 0.391, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5236351490020752, |
|
"rewards/margins": 1.2169435024261475, |
|
"rewards/rejected": -2.7405786514282227, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.2112272123788767e-08, |
|
"logits/chosen": -1.5720638036727905, |
|
"logits/rejected": -1.6471821069717407, |
|
"logps/chosen": -33.8635139465332, |
|
"logps/rejected": -59.320289611816406, |
|
"loss": 0.4516, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.3420783281326294, |
|
"rewards/margins": 1.5687716007232666, |
|
"rewards/rejected": -2.9108500480651855, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.1596431202184705e-08, |
|
"logits/chosen": -1.6860291957855225, |
|
"logits/rejected": -1.775101900100708, |
|
"logps/chosen": -41.8366584777832, |
|
"logps/rejected": -57.60499954223633, |
|
"loss": 0.3887, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.6872646808624268, |
|
"rewards/margins": 1.5663306713104248, |
|
"rewards/rejected": -3.2535953521728516, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.108594906036065e-08, |
|
"logits/chosen": -1.643472671508789, |
|
"logits/rejected": -1.6051247119903564, |
|
"logps/chosen": -34.84891891479492, |
|
"logps/rejected": -64.89028930664062, |
|
"loss": 0.4156, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.273041009902954, |
|
"rewards/margins": 1.3676272630691528, |
|
"rewards/rejected": -2.6406683921813965, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_logits/chosen": -1.7730110883712769, |
|
"eval_logits/rejected": -1.8370084762573242, |
|
"eval_logps/chosen": -41.216156005859375, |
|
"eval_logps/rejected": -62.23305892944336, |
|
"eval_loss": 0.4231519401073456, |
|
"eval_rewards/accuracies": 0.784744381904602, |
|
"eval_rewards/chosen": -1.3940469026565552, |
|
"eval_rewards/margins": 1.4405717849731445, |
|
"eval_rewards/rejected": -2.83461856842041, |
|
"eval_runtime": 682.593, |
|
"eval_samples_per_second": 29.3, |
|
"eval_steps_per_second": 0.459, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.0580860589092895e-08, |
|
"logits/chosen": -1.6066029071807861, |
|
"logits/rejected": -1.6168630123138428, |
|
"logps/chosen": -41.878971099853516, |
|
"logps/rejected": -57.808631896972656, |
|
"loss": 0.4474, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.3217754364013672, |
|
"rewards/margins": 1.4163072109222412, |
|
"rewards/rejected": -2.7380826473236084, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.008120031050753e-08, |
|
"logits/chosen": -1.6395975351333618, |
|
"logits/rejected": -1.647607445716858, |
|
"logps/chosen": -32.06978225708008, |
|
"logps/rejected": -61.098243713378906, |
|
"loss": 0.4582, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.065802812576294, |
|
"rewards/margins": 1.5604586601257324, |
|
"rewards/rejected": -2.6262614727020264, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.9587002375720862e-08, |
|
"logits/chosen": -1.6124694347381592, |
|
"logits/rejected": -1.6230475902557373, |
|
"logps/chosen": -37.16108703613281, |
|
"logps/rejected": -65.89389038085938, |
|
"loss": 0.4233, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.3276442289352417, |
|
"rewards/margins": 1.7044506072998047, |
|
"rewards/rejected": -3.032094717025757, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9098300562505266e-08, |
|
"logits/chosen": -1.4507148265838623, |
|
"logits/rejected": -1.503017783164978, |
|
"logps/chosen": -42.85657501220703, |
|
"logps/rejected": -58.918609619140625, |
|
"loss": 0.4174, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.3876417875289917, |
|
"rewards/margins": 1.4152082204818726, |
|
"rewards/rejected": -2.8028502464294434, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.8615128272980507e-08, |
|
"logits/chosen": -1.6832008361816406, |
|
"logits/rejected": -1.7357594966888428, |
|
"logps/chosen": -39.325965881347656, |
|
"logps/rejected": -56.73716354370117, |
|
"loss": 0.4169, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.447385549545288, |
|
"rewards/margins": 1.2441480159759521, |
|
"rewards/rejected": -2.6915335655212402, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.8137518531330763e-08, |
|
"logits/chosen": -1.6094249486923218, |
|
"logits/rejected": -1.6434955596923828, |
|
"logps/chosen": -48.951080322265625, |
|
"logps/rejected": -68.89022064208984, |
|
"loss": 0.396, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.743072748184204, |
|
"rewards/margins": 1.4508569240570068, |
|
"rewards/rejected": -3.193929672241211, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.7665503981547425e-08, |
|
"logits/chosen": -1.5964616537094116, |
|
"logits/rejected": -1.6168407201766968, |
|
"logps/chosen": -34.803096771240234, |
|
"logps/rejected": -55.64680862426758, |
|
"loss": 0.3922, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1366093158721924, |
|
"rewards/margins": 1.37692129611969, |
|
"rewards/rejected": -2.513530731201172, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.7199116885197997e-08, |
|
"logits/chosen": -1.628090500831604, |
|
"logits/rejected": -1.6611076593399048, |
|
"logps/chosen": -37.68436813354492, |
|
"logps/rejected": -69.68168640136719, |
|
"loss": 0.4042, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.5950982570648193, |
|
"rewards/margins": 1.762877106666565, |
|
"rewards/rejected": -3.3579750061035156, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.6738389119220965e-08, |
|
"logits/chosen": -1.660617470741272, |
|
"logits/rejected": -1.707891821861267, |
|
"logps/chosen": -42.92943572998047, |
|
"logps/rejected": -61.7951774597168, |
|
"loss": 0.4284, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.3408089876174927, |
|
"rewards/margins": 1.690129280090332, |
|
"rewards/rejected": -3.0309383869171143, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.6283352173747144e-08, |
|
"logits/chosen": -1.6138479709625244, |
|
"logits/rejected": -1.6750342845916748, |
|
"logps/chosen": -46.45161437988281, |
|
"logps/rejected": -58.84626007080078, |
|
"loss": 0.4116, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4797322750091553, |
|
"rewards/margins": 1.2433393001556396, |
|
"rewards/rejected": -2.723071575164795, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.5834037149947288e-08, |
|
"logits/chosen": -1.6072242259979248, |
|
"logits/rejected": -1.6416151523590088, |
|
"logps/chosen": -38.2723388671875, |
|
"logps/rejected": -57.70778274536133, |
|
"loss": 0.4209, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.4590721130371094, |
|
"rewards/margins": 1.4725803136825562, |
|
"rewards/rejected": -2.931652545928955, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.5390474757906446e-08, |
|
"logits/chosen": -1.5879796743392944, |
|
"logits/rejected": -1.6024706363677979, |
|
"logps/chosen": -39.99146270751953, |
|
"logps/rejected": -63.56482696533203, |
|
"loss": 0.3969, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3184869289398193, |
|
"rewards/margins": 1.4366834163665771, |
|
"rewards/rejected": -2.7551703453063965, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.495269531452491e-08, |
|
"logits/chosen": -1.54337477684021, |
|
"logits/rejected": -1.6411644220352173, |
|
"logps/chosen": -41.12114334106445, |
|
"logps/rejected": -64.66667175292969, |
|
"loss": 0.4173, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4270210266113281, |
|
"rewards/margins": 1.6593749523162842, |
|
"rewards/rejected": -3.0863962173461914, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.4520728741446087e-08, |
|
"logits/chosen": -1.515159010887146, |
|
"logits/rejected": -1.5698691606521606, |
|
"logps/chosen": -36.55290222167969, |
|
"logps/rejected": -53.955406188964844, |
|
"loss": 0.4138, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.2876484394073486, |
|
"rewards/margins": 1.5186991691589355, |
|
"rewards/rejected": -2.806347608566284, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.409460456301147e-08, |
|
"logits/chosen": -1.5839471817016602, |
|
"logits/rejected": -1.6540180444717407, |
|
"logps/chosen": -40.58269500732422, |
|
"logps/rejected": -59.73209762573242, |
|
"loss": 0.3761, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.4282195568084717, |
|
"rewards/margins": 1.590958595275879, |
|
"rewards/rejected": -3.0191779136657715, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.367435190424261e-08, |
|
"logits/chosen": -1.6678202152252197, |
|
"logits/rejected": -1.6910464763641357, |
|
"logps/chosen": -36.54034423828125, |
|
"logps/rejected": -64.41203308105469, |
|
"loss": 0.4144, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.320227026939392, |
|
"rewards/margins": 1.7249934673309326, |
|
"rewards/rejected": -3.0452206134796143, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.3259999488850471e-08, |
|
"logits/chosen": -1.6234756708145142, |
|
"logits/rejected": -1.6420822143554688, |
|
"logps/chosen": -42.137020111083984, |
|
"logps/rejected": -67.44734954833984, |
|
"loss": 0.4129, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.601574182510376, |
|
"rewards/margins": 1.8647511005401611, |
|
"rewards/rejected": -3.466325044631958, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.285157563727226e-08, |
|
"logits/chosen": -1.604988694190979, |
|
"logits/rejected": -1.6851269006729126, |
|
"logps/chosen": -33.69852828979492, |
|
"logps/rejected": -53.167816162109375, |
|
"loss": 0.4284, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.3362572193145752, |
|
"rewards/margins": 1.3889684677124023, |
|
"rewards/rejected": -2.7252256870269775, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.244910826473572e-08, |
|
"logits/chosen": -1.6953544616699219, |
|
"logits/rejected": -1.7079284191131592, |
|
"logps/chosen": -42.12826156616211, |
|
"logps/rejected": -69.13725280761719, |
|
"loss": 0.4081, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.5907026529312134, |
|
"rewards/margins": 1.5819746255874634, |
|
"rewards/rejected": -3.1726772785186768, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.2052624879351104e-08, |
|
"logits/chosen": -1.5870174169540405, |
|
"logits/rejected": -1.6522458791732788, |
|
"logps/chosen": -43.6306266784668, |
|
"logps/rejected": -68.43583679199219, |
|
"loss": 0.4484, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.6367082595825195, |
|
"rewards/margins": 1.6760241985321045, |
|
"rewards/rejected": -3.312732696533203, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": -1.77384352684021, |
|
"eval_logits/rejected": -1.83782160282135, |
|
"eval_logps/chosen": -41.399322509765625, |
|
"eval_logps/rejected": -62.5122184753418, |
|
"eval_loss": 0.42222440242767334, |
|
"eval_rewards/accuracies": 0.7835463285446167, |
|
"eval_rewards/chosen": -1.41236412525177, |
|
"eval_rewards/margins": 1.450170636177063, |
|
"eval_rewards/rejected": -2.862534761428833, |
|
"eval_runtime": 685.6658, |
|
"eval_samples_per_second": 29.169, |
|
"eval_steps_per_second": 0.456, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1662152580231144e-08, |
|
"logits/chosen": -1.559308409690857, |
|
"logits/rejected": -1.614293098449707, |
|
"logps/chosen": -39.211181640625, |
|
"logps/rejected": -52.80438995361328, |
|
"loss": 0.4043, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.3606131076812744, |
|
"rewards/margins": 1.217910647392273, |
|
"rewards/rejected": -2.578523635864258, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1277718055638818e-08, |
|
"logits/chosen": -1.60663640499115, |
|
"logits/rejected": -1.6318187713623047, |
|
"logps/chosen": -40.94214630126953, |
|
"logps/rejected": -59.09077835083008, |
|
"loss": 0.3994, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6311317682266235, |
|
"rewards/margins": 1.4645156860351562, |
|
"rewards/rejected": -3.095647096633911, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.089934758116322e-08, |
|
"logits/chosen": -1.6760724782943726, |
|
"logits/rejected": -1.7328195571899414, |
|
"logps/chosen": -39.17350387573242, |
|
"logps/rejected": -69.71881866455078, |
|
"loss": 0.4338, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3581247329711914, |
|
"rewards/margins": 1.6831839084625244, |
|
"rewards/rejected": -3.041308879852295, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.0527067017923653e-08, |
|
"logits/chosen": -1.5283730030059814, |
|
"logits/rejected": -1.5773917436599731, |
|
"logps/chosen": -42.39814376831055, |
|
"logps/rejected": -66.12855529785156, |
|
"loss": 0.3815, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.6773475408554077, |
|
"rewards/margins": 1.4954215288162231, |
|
"rewards/rejected": -3.1727688312530518, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.0160901810802114e-08, |
|
"logits/chosen": -1.5901730060577393, |
|
"logits/rejected": -1.6413530111312866, |
|
"logps/chosen": -38.090606689453125, |
|
"logps/rejected": -59.455177307128906, |
|
"loss": 0.4193, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.486709713935852, |
|
"rewards/margins": 1.4121246337890625, |
|
"rewards/rejected": -2.898834228515625, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.800876986704109e-09, |
|
"logits/chosen": -1.6886398792266846, |
|
"logits/rejected": -1.7103347778320312, |
|
"logps/chosen": -35.96743392944336, |
|
"logps/rejected": -66.10350799560547, |
|
"loss": 0.4172, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.0819008350372314, |
|
"rewards/margins": 1.741093635559082, |
|
"rewards/rejected": -2.8229947090148926, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.447017152848125e-09, |
|
"logits/chosen": -1.570709228515625, |
|
"logits/rejected": -1.6483341455459595, |
|
"logps/chosen": -37.19647979736328, |
|
"logps/rejected": -62.676673889160156, |
|
"loss": 0.3874, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.3310636281967163, |
|
"rewards/margins": 1.7905714511871338, |
|
"rewards/rejected": -3.1216349601745605, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.099346495083749e-09, |
|
"logits/chosen": -1.547473669052124, |
|
"logits/rejected": -1.6466220617294312, |
|
"logps/chosen": -40.151947021484375, |
|
"logps/rejected": -69.61228942871094, |
|
"loss": 0.4246, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.591925024986267, |
|
"rewards/margins": 1.6890861988067627, |
|
"rewards/rejected": -3.2810111045837402, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.75788877623862e-09, |
|
"logits/chosen": -1.587485432624817, |
|
"logits/rejected": -1.6669256687164307, |
|
"logps/chosen": -40.81727981567383, |
|
"logps/rejected": -59.38299560546875, |
|
"loss": 0.3917, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6642837524414062, |
|
"rewards/margins": 1.4963958263397217, |
|
"rewards/rejected": -3.160679340362549, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.422667334494249e-09, |
|
"logits/chosen": -1.6186378002166748, |
|
"logits/rejected": -1.655207633972168, |
|
"logps/chosen": -36.024803161621094, |
|
"logps/rejected": -52.768226623535156, |
|
"loss": 0.3974, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1602214574813843, |
|
"rewards/margins": 1.2412422895431519, |
|
"rewards/rejected": -2.401463747024536, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.093705081790891e-09, |
|
"logits/chosen": -1.5935732126235962, |
|
"logits/rejected": -1.6794706583023071, |
|
"logps/chosen": -36.518035888671875, |
|
"logps/rejected": -56.438758850097656, |
|
"loss": 0.4131, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.366016149520874, |
|
"rewards/margins": 1.6523935794830322, |
|
"rewards/rejected": -3.0184097290039062, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 7.771024502261525e-09, |
|
"logits/chosen": -1.513396143913269, |
|
"logits/rejected": -1.6116784811019897, |
|
"logps/chosen": -40.99394607543945, |
|
"logps/rejected": -66.16812133789062, |
|
"loss": 0.4029, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.6409269571304321, |
|
"rewards/margins": 1.6607745885849, |
|
"rewards/rejected": -3.301701784133911, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 7.454647650695157e-09, |
|
"logits/chosen": -1.6389579772949219, |
|
"logits/rejected": -1.6486486196517944, |
|
"logps/chosen": -41.79307174682617, |
|
"logps/rejected": -59.66328811645508, |
|
"loss": 0.4002, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5924742221832275, |
|
"rewards/margins": 1.3968980312347412, |
|
"rewards/rejected": -2.9893722534179688, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 7.144596151029303e-09, |
|
"logits/chosen": -1.5585296154022217, |
|
"logits/rejected": -1.5989525318145752, |
|
"logps/chosen": -40.13111114501953, |
|
"logps/rejected": -57.73792266845703, |
|
"loss": 0.4051, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3726533651351929, |
|
"rewards/margins": 1.4509780406951904, |
|
"rewards/rejected": -2.823631763458252, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.840891194872111e-09, |
|
"logits/chosen": -1.5726600885391235, |
|
"logits/rejected": -1.6179898977279663, |
|
"logps/chosen": -36.399269104003906, |
|
"logps/rejected": -57.10075759887695, |
|
"loss": 0.3965, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4039640426635742, |
|
"rewards/margins": 1.5349563360214233, |
|
"rewards/rejected": -2.938920259475708, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 6.5435535400539254e-09, |
|
"logits/chosen": -1.5845414400100708, |
|
"logits/rejected": -1.6111781597137451, |
|
"logps/chosen": -36.8482666015625, |
|
"logps/rejected": -65.9016342163086, |
|
"loss": 0.4049, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.438260555267334, |
|
"rewards/margins": 1.4630625247955322, |
|
"rewards/rejected": -2.901322841644287, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 6.252603509208465e-09, |
|
"logits/chosen": -1.6787540912628174, |
|
"logits/rejected": -1.7322124242782593, |
|
"logps/chosen": -36.04154586791992, |
|
"logps/rejected": -56.341712951660156, |
|
"loss": 0.4299, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.421565055847168, |
|
"rewards/margins": 1.3360097408294678, |
|
"rewards/rejected": -2.7575747966766357, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.9680609883838825e-09, |
|
"logits/chosen": -1.7003612518310547, |
|
"logits/rejected": -1.7145462036132812, |
|
"logps/chosen": -39.15507507324219, |
|
"logps/rejected": -60.2194709777832, |
|
"loss": 0.4203, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.5533109903335571, |
|
"rewards/margins": 1.3238098621368408, |
|
"rewards/rejected": -2.8771207332611084, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.689945425683473e-09, |
|
"logits/chosen": -1.5450057983398438, |
|
"logits/rejected": -1.621883749961853, |
|
"logps/chosen": -33.54936981201172, |
|
"logps/rejected": -61.19047927856445, |
|
"loss": 0.4272, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.2975671291351318, |
|
"rewards/margins": 1.844926118850708, |
|
"rewards/rejected": -3.142493486404419, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.418275829936536e-09, |
|
"logits/chosen": -1.646265983581543, |
|
"logits/rejected": -1.673706293106079, |
|
"logps/chosen": -35.322303771972656, |
|
"logps/rejected": -54.55794143676758, |
|
"loss": 0.4042, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.0582917928695679, |
|
"rewards/margins": 1.6028120517730713, |
|
"rewards/rejected": -2.6611039638519287, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_logits/chosen": -1.7735157012939453, |
|
"eval_logits/rejected": -1.837532639503479, |
|
"eval_logps/chosen": -41.474159240722656, |
|
"eval_logps/rejected": -62.62734603881836, |
|
"eval_loss": 0.4219669997692108, |
|
"eval_rewards/accuracies": 0.7867411971092224, |
|
"eval_rewards/chosen": -1.419848084449768, |
|
"eval_rewards/margins": 1.4541994333267212, |
|
"eval_rewards/rejected": -2.8740475177764893, |
|
"eval_runtime": 686.9405, |
|
"eval_samples_per_second": 29.115, |
|
"eval_steps_per_second": 0.456, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.15307076939906e-09, |
|
"logits/chosen": -1.6508762836456299, |
|
"logits/rejected": -1.6736085414886475, |
|
"logps/chosen": -34.982662200927734, |
|
"logps/rejected": -77.63450622558594, |
|
"loss": 0.4219, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3135364055633545, |
|
"rewards/margins": 1.8769088983535767, |
|
"rewards/rejected": -3.1904451847076416, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.8943483704846465e-09, |
|
"logits/chosen": -1.6302423477172852, |
|
"logits/rejected": -1.6854890584945679, |
|
"logps/chosen": -35.06315612792969, |
|
"logps/rejected": -58.57770538330078, |
|
"loss": 0.3989, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2450635433197021, |
|
"rewards/margins": 1.4307886362075806, |
|
"rewards/rejected": -2.6758522987365723, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.6421263165255855e-09, |
|
"logits/chosen": -1.6691267490386963, |
|
"logits/rejected": -1.645403265953064, |
|
"logps/chosen": -37.04888916015625, |
|
"logps/rejected": -57.073890686035156, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.4926297664642334, |
|
"rewards/margins": 1.0904326438903809, |
|
"rewards/rejected": -2.5830626487731934, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.396421846564236e-09, |
|
"logits/chosen": -1.6642389297485352, |
|
"logits/rejected": -1.6480493545532227, |
|
"logps/chosen": -38.11848449707031, |
|
"logps/rejected": -57.89258575439453, |
|
"loss": 0.4205, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4186296463012695, |
|
"rewards/margins": 1.2914459705352783, |
|
"rewards/rejected": -2.710075855255127, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.157251754174729e-09, |
|
"logits/chosen": -1.5499961376190186, |
|
"logits/rejected": -1.5773283243179321, |
|
"logps/chosen": -33.74531936645508, |
|
"logps/rejected": -61.049644470214844, |
|
"loss": 0.4196, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.35075044631958, |
|
"rewards/margins": 1.5633623600006104, |
|
"rewards/rejected": -2.9141130447387695, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.924632386315185e-09, |
|
"logits/chosen": -1.5876575708389282, |
|
"logits/rejected": -1.633788824081421, |
|
"logps/chosen": -38.55252456665039, |
|
"logps/rejected": -66.43755340576172, |
|
"loss": 0.4176, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.3177063465118408, |
|
"rewards/margins": 1.7444379329681396, |
|
"rewards/rejected": -3.0621442794799805, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.6985796422103977e-09, |
|
"logits/chosen": -1.6687253713607788, |
|
"logits/rejected": -1.7130107879638672, |
|
"logps/chosen": -38.195884704589844, |
|
"logps/rejected": -60.389747619628906, |
|
"loss": 0.402, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4002857208251953, |
|
"rewards/margins": 1.495273232460022, |
|
"rewards/rejected": -2.895559072494507, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.4791089722651433e-09, |
|
"logits/chosen": -1.565155029296875, |
|
"logits/rejected": -1.611336350440979, |
|
"logps/chosen": -37.587562561035156, |
|
"logps/rejected": -55.95196533203125, |
|
"loss": 0.3799, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.3682587146759033, |
|
"rewards/margins": 1.4426069259643555, |
|
"rewards/rejected": -2.810865640640259, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.266235377008175e-09, |
|
"logits/chosen": -1.5435701608657837, |
|
"logits/rejected": -1.643436074256897, |
|
"logps/chosen": -35.90860366821289, |
|
"logps/rejected": -62.77077102661133, |
|
"loss": 0.4023, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4303227663040161, |
|
"rewards/margins": 1.7711102962493896, |
|
"rewards/rejected": -3.2014331817626953, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.0599734060669624e-09, |
|
"logits/chosen": -1.5973424911499023, |
|
"logits/rejected": -1.6078002452850342, |
|
"logps/chosen": -35.39159393310547, |
|
"logps/rejected": -62.9890022277832, |
|
"loss": 0.4134, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.6187604665756226, |
|
"rewards/margins": 1.505409836769104, |
|
"rewards/rejected": -3.1241705417633057, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.860337157173243e-09, |
|
"logits/chosen": -1.7700763940811157, |
|
"logits/rejected": -1.7940946817398071, |
|
"logps/chosen": -41.7716064453125, |
|
"logps/rejected": -68.30381774902344, |
|
"loss": 0.3871, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.6702451705932617, |
|
"rewards/margins": 1.6783167123794556, |
|
"rewards/rejected": -3.3485617637634277, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6673402751994255e-09, |
|
"logits/chosen": -1.5539026260375977, |
|
"logits/rejected": -1.6509206295013428, |
|
"logps/chosen": -32.936622619628906, |
|
"logps/rejected": -52.0973014831543, |
|
"loss": 0.4306, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2196872234344482, |
|
"rewards/margins": 1.6633917093276978, |
|
"rewards/rejected": -2.8830788135528564, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.480995951226028e-09, |
|
"logits/chosen": -1.6145340204238892, |
|
"logits/rejected": -1.6516563892364502, |
|
"logps/chosen": -38.63236618041992, |
|
"logps/rejected": -56.13676071166992, |
|
"loss": 0.4295, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4363205432891846, |
|
"rewards/margins": 1.359151840209961, |
|
"rewards/rejected": -2.7954723834991455, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.301316921640073e-09, |
|
"logits/chosen": -1.5776631832122803, |
|
"logits/rejected": -1.6412086486816406, |
|
"logps/chosen": -39.75293731689453, |
|
"logps/rejected": -53.77742385864258, |
|
"loss": 0.4327, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.3652405738830566, |
|
"rewards/margins": 1.2693113088607788, |
|
"rewards/rejected": -2.634552001953125, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.128315467264552e-09, |
|
"logits/chosen": -1.5176748037338257, |
|
"logits/rejected": -1.5679428577423096, |
|
"logps/chosen": -33.86006546020508, |
|
"logps/rejected": -54.69047164916992, |
|
"loss": 0.3959, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.3462402820587158, |
|
"rewards/margins": 1.469523310661316, |
|
"rewards/rejected": -2.815763473510742, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.962003412519064e-09, |
|
"logits/chosen": -1.62423574924469, |
|
"logits/rejected": -1.6784271001815796, |
|
"logps/chosen": -35.614990234375, |
|
"logps/rejected": -64.7601089477539, |
|
"loss": 0.4106, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.2779735326766968, |
|
"rewards/margins": 1.7429111003875732, |
|
"rewards/rejected": -3.0208849906921387, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.8023921246116402e-09, |
|
"logits/chosen": -1.5703450441360474, |
|
"logits/rejected": -1.6502907276153564, |
|
"logps/chosen": -41.66686248779297, |
|
"logps/rejected": -62.78812789916992, |
|
"loss": 0.4231, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.357747197151184, |
|
"rewards/margins": 1.6436717510223389, |
|
"rewards/rejected": -3.0014190673828125, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.6494925127617632e-09, |
|
"logits/chosen": -1.6496837139129639, |
|
"logits/rejected": -1.6998779773712158, |
|
"logps/chosen": -38.936195373535156, |
|
"logps/rejected": -61.56520462036133, |
|
"loss": 0.4303, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.5239100456237793, |
|
"rewards/margins": 1.3634536266326904, |
|
"rewards/rejected": -2.887363910675049, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.5033150274548324e-09, |
|
"logits/chosen": -1.512958288192749, |
|
"logits/rejected": -1.5619103908538818, |
|
"logps/chosen": -38.248863220214844, |
|
"logps/rejected": -60.7032356262207, |
|
"loss": 0.4007, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4521785974502563, |
|
"rewards/margins": 1.4607205390930176, |
|
"rewards/rejected": -2.9128990173339844, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.3638696597277677e-09, |
|
"logits/chosen": -1.4637887477874756, |
|
"logits/rejected": -1.4906442165374756, |
|
"logps/chosen": -35.28557586669922, |
|
"logps/rejected": -61.87548065185547, |
|
"loss": 0.4279, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.39443039894104, |
|
"rewards/margins": 1.446938157081604, |
|
"rewards/rejected": -2.8413681983947754, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_logits/chosen": -1.7738286256790161, |
|
"eval_logits/rejected": -1.8378678560256958, |
|
"eval_logps/chosen": -41.48775863647461, |
|
"eval_logps/rejected": -62.66281509399414, |
|
"eval_loss": 0.42165884375572205, |
|
"eval_rewards/accuracies": 0.7863418459892273, |
|
"eval_rewards/chosen": -1.421207308769226, |
|
"eval_rewards/margins": 1.4563862085342407, |
|
"eval_rewards/rejected": -2.877593755722046, |
|
"eval_runtime": 691.2085, |
|
"eval_samples_per_second": 28.935, |
|
"eval_steps_per_second": 0.453, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.231165940486234e-09, |
|
"logits/chosen": -1.5569957494735718, |
|
"logits/rejected": -1.6261913776397705, |
|
"logps/chosen": -43.071205139160156, |
|
"logps/rejected": -69.6430435180664, |
|
"loss": 0.3962, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4279512166976929, |
|
"rewards/margins": 1.9218699932098389, |
|
"rewards/rejected": -3.349821090698242, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1052129398531506e-09, |
|
"logits/chosen": -1.4935729503631592, |
|
"logits/rejected": -1.5457624197006226, |
|
"logps/chosen": -35.38841247558594, |
|
"logps/rejected": -55.37394332885742, |
|
"loss": 0.391, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4084322452545166, |
|
"rewards/margins": 1.3835333585739136, |
|
"rewards/rejected": -2.7919657230377197, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.86019266548821e-10, |
|
"logits/chosen": -1.5539190769195557, |
|
"logits/rejected": -1.5932285785675049, |
|
"logps/chosen": -37.63361740112305, |
|
"logps/rejected": -64.95671081542969, |
|
"loss": 0.369, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.5340850353240967, |
|
"rewards/margins": 1.674046516418457, |
|
"rewards/rejected": -3.2081313133239746, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.735930673024805e-10, |
|
"logits/chosen": -1.5568705797195435, |
|
"logits/rejected": -1.5749341249465942, |
|
"logps/chosen": -42.25778579711914, |
|
"logps/rejected": -62.059730529785156, |
|
"loss": 0.4085, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.544942855834961, |
|
"rewards/margins": 1.6132011413574219, |
|
"rewards/rejected": -3.1581437587738037, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.679420262954983e-10, |
|
"logits/chosen": -1.615049123764038, |
|
"logits/rejected": -1.6281379461288452, |
|
"logps/chosen": -40.41718673706055, |
|
"logps/rejected": -66.48967742919922, |
|
"loss": 0.3765, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5013377666473389, |
|
"rewards/margins": 1.585729718208313, |
|
"rewards/rejected": -3.0870673656463623, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 6.690733646361856e-10, |
|
"logits/chosen": -1.5112344026565552, |
|
"logits/rejected": -1.5903434753417969, |
|
"logps/chosen": -43.65718078613281, |
|
"logps/rejected": -57.16192626953125, |
|
"loss": 0.3986, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.4910286664962769, |
|
"rewards/margins": 1.6062946319580078, |
|
"rewards/rejected": -3.097323179244995, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.769938398662355e-10, |
|
"logits/chosen": -1.602304458618164, |
|
"logits/rejected": -1.6733729839324951, |
|
"logps/chosen": -36.098472595214844, |
|
"logps/rejected": -61.344871520996094, |
|
"loss": 0.4255, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.2141350507736206, |
|
"rewards/margins": 1.717765212059021, |
|
"rewards/rejected": -2.9318997859954834, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.917097454988583e-10, |
|
"logits/chosen": -1.621604323387146, |
|
"logits/rejected": -1.5961339473724365, |
|
"logps/chosen": -38.64922332763672, |
|
"logps/rejected": -50.25775909423828, |
|
"loss": 0.4178, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.6571447849273682, |
|
"rewards/margins": 0.9274013638496399, |
|
"rewards/rejected": -2.5845460891723633, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.132269105886155e-10, |
|
"logits/chosen": -1.6364376544952393, |
|
"logits/rejected": -1.6746025085449219, |
|
"logps/chosen": -38.60770797729492, |
|
"logps/rejected": -63.7396125793457, |
|
"loss": 0.3967, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5758907794952393, |
|
"rewards/margins": 1.5158182382583618, |
|
"rewards/rejected": -3.0917086601257324, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.4155069933301526e-10, |
|
"logits/chosen": -1.5119609832763672, |
|
"logits/rejected": -1.5766661167144775, |
|
"logps/chosen": -35.88676071166992, |
|
"logps/rejected": -63.843101501464844, |
|
"loss": 0.3933, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.2250229120254517, |
|
"rewards/margins": 1.9286922216415405, |
|
"rewards/rejected": -3.153715133666992, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.7668601070588436e-10, |
|
"logits/chosen": -1.5765451192855835, |
|
"logits/rejected": -1.5987513065338135, |
|
"logps/chosen": -36.32225799560547, |
|
"logps/rejected": -54.76746368408203, |
|
"loss": 0.3818, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.4643820524215698, |
|
"rewards/margins": 1.3365025520324707, |
|
"rewards/rejected": -2.80088472366333, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.186372781225465e-10, |
|
"logits/chosen": -1.6033214330673218, |
|
"logits/rejected": -1.6094167232513428, |
|
"logps/chosen": -37.742759704589844, |
|
"logps/rejected": -62.64867401123047, |
|
"loss": 0.4155, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5261662006378174, |
|
"rewards/margins": 1.1791956424713135, |
|
"rewards/rejected": -2.7053616046905518, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.6740846913674279e-10, |
|
"logits/chosen": -1.612987756729126, |
|
"logits/rejected": -1.6750417947769165, |
|
"logps/chosen": -37.47745895385742, |
|
"logps/rejected": -54.88645553588867, |
|
"loss": 0.3985, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3979930877685547, |
|
"rewards/margins": 1.492209553718567, |
|
"rewards/rejected": -2.890202760696411, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.2300308516952628e-10, |
|
"logits/chosen": -1.6041619777679443, |
|
"logits/rejected": -1.6311432123184204, |
|
"logps/chosen": -38.45185852050781, |
|
"logps/rejected": -69.1260757446289, |
|
"loss": 0.4106, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6295732259750366, |
|
"rewards/margins": 1.7004085779190063, |
|
"rewards/rejected": -3.329982042312622, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.542416126989804e-11, |
|
"logits/chosen": -1.5118248462677002, |
|
"logits/rejected": -1.565796971321106, |
|
"logps/chosen": -33.17974090576172, |
|
"logps/rejected": -60.13874435424805, |
|
"loss": 0.4105, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.143090009689331, |
|
"rewards/margins": 1.6752750873565674, |
|
"rewards/rejected": -2.8183650970458984, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.46742659073951e-11, |
|
"logits/chosen": -1.5557069778442383, |
|
"logits/rejected": -1.6500135660171509, |
|
"logps/chosen": -31.341394424438477, |
|
"logps/rejected": -57.10686492919922, |
|
"loss": 0.3918, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.228372573852539, |
|
"rewards/margins": 1.6340805292129517, |
|
"rewards/rejected": -2.862452983856201, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 3.0755500796531e-11, |
|
"logits/chosen": -1.477113962173462, |
|
"logits/rejected": -1.5181306600570679, |
|
"logps/chosen": -39.756988525390625, |
|
"logps/rejected": -61.148597717285156, |
|
"loss": 0.406, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2764074802398682, |
|
"rewards/margins": 1.4668867588043213, |
|
"rewards/rejected": -2.7432944774627686, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.3669500753099584e-11, |
|
"logits/chosen": -1.6041514873504639, |
|
"logits/rejected": -1.6731233596801758, |
|
"logps/chosen": -37.40638732910156, |
|
"logps/rejected": -59.667518615722656, |
|
"loss": 0.4275, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3378410339355469, |
|
"rewards/margins": 1.7126833200454712, |
|
"rewards/rejected": -3.0505242347717285, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 3.417433582542095e-12, |
|
"logits/chosen": -1.5742517709732056, |
|
"logits/rejected": -1.5962722301483154, |
|
"logps/chosen": -32.036895751953125, |
|
"logps/rejected": -64.07173919677734, |
|
"loss": 0.4186, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.2192351818084717, |
|
"rewards/margins": 1.7447025775909424, |
|
"rewards/rejected": -2.963937520980835, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.5938119888305664, |
|
"logits/rejected": -1.6400257349014282, |
|
"logps/chosen": -37.012535095214844, |
|
"logps/rejected": -61.68400955200195, |
|
"loss": 0.4215, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.2453371286392212, |
|
"rewards/margins": 1.8222535848617554, |
|
"rewards/rejected": -3.0675911903381348, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_logits/chosen": -1.7737666368484497, |
|
"eval_logits/rejected": -1.837766170501709, |
|
"eval_logps/chosen": -41.47916030883789, |
|
"eval_logps/rejected": -62.65677261352539, |
|
"eval_loss": 0.42165058851242065, |
|
"eval_rewards/accuracies": 0.7855431437492371, |
|
"eval_rewards/chosen": -1.4203476905822754, |
|
"eval_rewards/margins": 1.4566426277160645, |
|
"eval_rewards/rejected": -2.8769900798797607, |
|
"eval_runtime": 689.4497, |
|
"eval_samples_per_second": 29.009, |
|
"eval_steps_per_second": 0.454, |
|
"step": 2000 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 2000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|