|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9978142076502732, |
|
"eval_steps": 400, |
|
"global_step": 914, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01092896174863388, |
|
"grad_norm": 47.87060782291424, |
|
"learning_rate": 5.434782608695652e-08, |
|
"logits/chosen": -1.0122432708740234, |
|
"logits/rejected": -1.0073297023773193, |
|
"logps/chosen": -0.28066128492355347, |
|
"logps/rejected": -0.2858629524707794, |
|
"loss": 3.1518, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.806612968444824, |
|
"rewards/margins": 0.05201658606529236, |
|
"rewards/rejected": -2.8586294651031494, |
|
"semantic_entropy": 0.7517332434654236, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02185792349726776, |
|
"grad_norm": 63.59519845931534, |
|
"learning_rate": 1.0869565217391303e-07, |
|
"logits/chosen": -1.0451396703720093, |
|
"logits/rejected": -0.9949606657028198, |
|
"logps/chosen": -0.25711697340011597, |
|
"logps/rejected": -0.27150270342826843, |
|
"loss": 3.1207, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.57116961479187, |
|
"rewards/margins": 0.14385755360126495, |
|
"rewards/rejected": -2.715027332305908, |
|
"semantic_entropy": 0.7098506689071655, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03278688524590164, |
|
"grad_norm": 52.932404991436066, |
|
"learning_rate": 1.6304347826086955e-07, |
|
"logits/chosen": -1.0101398229599, |
|
"logits/rejected": -0.9632788896560669, |
|
"logps/chosen": -0.2672443389892578, |
|
"logps/rejected": -0.2731854319572449, |
|
"loss": 3.1124, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.6724436283111572, |
|
"rewards/margins": 0.05941082164645195, |
|
"rewards/rejected": -2.731854200363159, |
|
"semantic_entropy": 0.7272862195968628, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04371584699453552, |
|
"grad_norm": 68.70297338794734, |
|
"learning_rate": 2.1739130434782607e-07, |
|
"logits/chosen": -0.946621298789978, |
|
"logits/rejected": -0.8962594270706177, |
|
"logps/chosen": -0.2722616195678711, |
|
"logps/rejected": -0.2844754159450531, |
|
"loss": 3.1543, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.722616195678711, |
|
"rewards/margins": 0.12213809788227081, |
|
"rewards/rejected": -2.844754219055176, |
|
"semantic_entropy": 0.7445966601371765, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0546448087431694, |
|
"grad_norm": 34.23797136353184, |
|
"learning_rate": 2.717391304347826e-07, |
|
"logits/chosen": -0.9447389841079712, |
|
"logits/rejected": -0.8695358037948608, |
|
"logps/chosen": -0.27488625049591064, |
|
"logps/rejected": -0.29340118169784546, |
|
"loss": 3.1248, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.7488627433776855, |
|
"rewards/margins": 0.1851491630077362, |
|
"rewards/rejected": -2.934011936187744, |
|
"semantic_entropy": 0.753722071647644, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06557377049180328, |
|
"grad_norm": 56.95442636508264, |
|
"learning_rate": 3.260869565217391e-07, |
|
"logits/chosen": -1.0504213571548462, |
|
"logits/rejected": -0.9853544235229492, |
|
"logps/chosen": -0.26506370306015015, |
|
"logps/rejected": -0.2821282744407654, |
|
"loss": 3.1282, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.650637149810791, |
|
"rewards/margins": 0.1706458032131195, |
|
"rewards/rejected": -2.8212831020355225, |
|
"semantic_entropy": 0.7199792861938477, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07650273224043716, |
|
"grad_norm": 54.514089612724746, |
|
"learning_rate": 3.8043478260869567e-07, |
|
"logits/chosen": -1.0058822631835938, |
|
"logits/rejected": -0.9390825033187866, |
|
"logps/chosen": -0.2544824182987213, |
|
"logps/rejected": -0.2758719325065613, |
|
"loss": 3.1, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.5448241233825684, |
|
"rewards/margins": 0.2138955146074295, |
|
"rewards/rejected": -2.7587194442749023, |
|
"semantic_entropy": 0.714081346988678, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08743169398907104, |
|
"grad_norm": 61.13897060157166, |
|
"learning_rate": 4.3478260869565214e-07, |
|
"logits/chosen": -0.9637517929077148, |
|
"logits/rejected": -0.9011168479919434, |
|
"logps/chosen": -0.28103750944137573, |
|
"logps/rejected": -0.29354166984558105, |
|
"loss": 3.1681, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.8103749752044678, |
|
"rewards/margins": 0.12504148483276367, |
|
"rewards/rejected": -2.9354166984558105, |
|
"semantic_entropy": 0.7535971999168396, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09836065573770492, |
|
"grad_norm": 29.50202425422368, |
|
"learning_rate": 4.891304347826087e-07, |
|
"logits/chosen": -1.011054515838623, |
|
"logits/rejected": -0.9284116625785828, |
|
"logps/chosen": -0.28203994035720825, |
|
"logps/rejected": -0.3046588599681854, |
|
"loss": 3.106, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -2.820399522781372, |
|
"rewards/margins": 0.2261890470981598, |
|
"rewards/rejected": -3.04658842086792, |
|
"semantic_entropy": 0.7553126811981201, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1092896174863388, |
|
"grad_norm": 60.818918802477036, |
|
"learning_rate": 5.434782608695652e-07, |
|
"logits/chosen": -0.9375956654548645, |
|
"logits/rejected": -0.8574072122573853, |
|
"logps/chosen": -0.2780763804912567, |
|
"logps/rejected": -0.28224700689315796, |
|
"loss": 3.1338, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -2.780763864517212, |
|
"rewards/margins": 0.04170636087656021, |
|
"rewards/rejected": -2.822470188140869, |
|
"semantic_entropy": 0.7434889078140259, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12021857923497267, |
|
"grad_norm": 34.29716426184461, |
|
"learning_rate": 5.978260869565217e-07, |
|
"logits/chosen": -0.9751367568969727, |
|
"logits/rejected": -0.8606834411621094, |
|
"logps/chosen": -0.2696499526500702, |
|
"logps/rejected": -0.29947254061698914, |
|
"loss": 3.0524, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.6964995861053467, |
|
"rewards/margins": 0.2982260584831238, |
|
"rewards/rejected": -2.9947257041931152, |
|
"semantic_entropy": 0.7428679466247559, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.13114754098360656, |
|
"grad_norm": 32.36546820788893, |
|
"learning_rate": 6.521739130434782e-07, |
|
"logits/chosen": -1.0148303508758545, |
|
"logits/rejected": -0.9685667157173157, |
|
"logps/chosen": -0.25762075185775757, |
|
"logps/rejected": -0.2997520864009857, |
|
"loss": 3.0039, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.576206922531128, |
|
"rewards/margins": 0.42131391167640686, |
|
"rewards/rejected": -2.997521162033081, |
|
"semantic_entropy": 0.7362821102142334, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14207650273224043, |
|
"grad_norm": 47.86126164856308, |
|
"learning_rate": 7.065217391304348e-07, |
|
"logits/chosen": -1.002937912940979, |
|
"logits/rejected": -0.9363768696784973, |
|
"logps/chosen": -0.2962821125984192, |
|
"logps/rejected": -0.3176509141921997, |
|
"loss": 3.0992, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.9628207683563232, |
|
"rewards/margins": 0.2136881798505783, |
|
"rewards/rejected": -3.176509141921997, |
|
"semantic_entropy": 0.7823900580406189, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.15300546448087432, |
|
"grad_norm": 83.46398772579433, |
|
"learning_rate": 7.608695652173913e-07, |
|
"logits/chosen": -0.9694533348083496, |
|
"logits/rejected": -0.9480490684509277, |
|
"logps/chosen": -0.2837492823600769, |
|
"logps/rejected": -0.3052641451358795, |
|
"loss": 3.0367, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.8374929428100586, |
|
"rewards/margins": 0.2151484489440918, |
|
"rewards/rejected": -3.0526413917541504, |
|
"semantic_entropy": 0.7394664883613586, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16393442622950818, |
|
"grad_norm": 35.83270782611293, |
|
"learning_rate": 8.152173913043478e-07, |
|
"logits/chosen": -0.9647692441940308, |
|
"logits/rejected": -0.9482067227363586, |
|
"logps/chosen": -0.2907211184501648, |
|
"logps/rejected": -0.33229631185531616, |
|
"loss": 3.0658, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.9072110652923584, |
|
"rewards/margins": 0.41575226187705994, |
|
"rewards/rejected": -3.322962999343872, |
|
"semantic_entropy": 0.7694975733757019, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17486338797814208, |
|
"grad_norm": 52.413564512749005, |
|
"learning_rate": 8.695652173913043e-07, |
|
"logits/chosen": -0.9714950323104858, |
|
"logits/rejected": -0.9107065200805664, |
|
"logps/chosen": -0.2882896065711975, |
|
"logps/rejected": -0.3103812336921692, |
|
"loss": 3.0244, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.8828964233398438, |
|
"rewards/margins": 0.22091606259346008, |
|
"rewards/rejected": -3.1038122177124023, |
|
"semantic_entropy": 0.7423045039176941, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18579234972677597, |
|
"grad_norm": 57.128124235325, |
|
"learning_rate": 9.239130434782608e-07, |
|
"logits/chosen": -0.9738727807998657, |
|
"logits/rejected": -0.9262188076972961, |
|
"logps/chosen": -0.29303327202796936, |
|
"logps/rejected": -0.337748646736145, |
|
"loss": 3.0267, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.9303324222564697, |
|
"rewards/margins": 0.44715413451194763, |
|
"rewards/rejected": -3.37748646736145, |
|
"semantic_entropy": 0.7571176290512085, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19672131147540983, |
|
"grad_norm": 39.74743242931724, |
|
"learning_rate": 9.782608695652173e-07, |
|
"logits/chosen": -1.046452283859253, |
|
"logits/rejected": -0.9666553735733032, |
|
"logps/chosen": -0.31861579418182373, |
|
"logps/rejected": -0.34951895475387573, |
|
"loss": 3.0463, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -3.186157703399658, |
|
"rewards/margins": 0.3090316653251648, |
|
"rewards/rejected": -3.495189666748047, |
|
"semantic_entropy": 0.8055832982063293, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20765027322404372, |
|
"grad_norm": 51.89832814789265, |
|
"learning_rate": 9.999671349822886e-07, |
|
"logits/chosen": -0.9848623275756836, |
|
"logits/rejected": -0.9856392741203308, |
|
"logps/chosen": -0.31298893690109253, |
|
"logps/rejected": -0.3401663601398468, |
|
"loss": 2.9541, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -3.1298892498016357, |
|
"rewards/margins": 0.2717742323875427, |
|
"rewards/rejected": -3.4016640186309814, |
|
"semantic_entropy": 0.7869037389755249, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2185792349726776, |
|
"grad_norm": 69.97139505648609, |
|
"learning_rate": 9.997663088532014e-07, |
|
"logits/chosen": -0.9892705678939819, |
|
"logits/rejected": -0.943418025970459, |
|
"logps/chosen": -0.35917508602142334, |
|
"logps/rejected": -0.4198976159095764, |
|
"loss": 2.9725, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -3.5917506217956543, |
|
"rewards/margins": 0.6072250008583069, |
|
"rewards/rejected": -4.198975563049316, |
|
"semantic_entropy": 0.834593653678894, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22950819672131148, |
|
"grad_norm": 43.67519297008509, |
|
"learning_rate": 9.9938298818292e-07, |
|
"logits/chosen": -1.0403445959091187, |
|
"logits/rejected": -1.0104751586914062, |
|
"logps/chosen": -0.32551589608192444, |
|
"logps/rejected": -0.38466745615005493, |
|
"loss": 2.9376, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -3.2551589012145996, |
|
"rewards/margins": 0.5915151834487915, |
|
"rewards/rejected": -3.8466744422912598, |
|
"semantic_entropy": 0.8123003840446472, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.24043715846994534, |
|
"grad_norm": 54.4822346164963, |
|
"learning_rate": 9.98817312944725e-07, |
|
"logits/chosen": -1.0293775796890259, |
|
"logits/rejected": -1.0085766315460205, |
|
"logps/chosen": -0.34657078981399536, |
|
"logps/rejected": -0.44877204298973083, |
|
"loss": 2.9452, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -3.465707778930664, |
|
"rewards/margins": 1.0220123529434204, |
|
"rewards/rejected": -4.487720012664795, |
|
"semantic_entropy": 0.8509441614151001, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25136612021857924, |
|
"grad_norm": 53.517455700291855, |
|
"learning_rate": 9.98069489700446e-07, |
|
"logits/chosen": -1.0341802835464478, |
|
"logits/rejected": -0.9952918887138367, |
|
"logps/chosen": -0.3461839258670807, |
|
"logps/rejected": -0.4705514907836914, |
|
"loss": 2.8994, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -3.4618396759033203, |
|
"rewards/margins": 1.2436755895614624, |
|
"rewards/rejected": -4.705514907836914, |
|
"semantic_entropy": 0.8380171656608582, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.26229508196721313, |
|
"grad_norm": 40.64481855809536, |
|
"learning_rate": 9.971397915250336e-07, |
|
"logits/chosen": -1.0739099979400635, |
|
"logits/rejected": -1.0038702487945557, |
|
"logps/chosen": -0.3547818958759308, |
|
"logps/rejected": -0.4196414053440094, |
|
"loss": 2.8774, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -3.547818660736084, |
|
"rewards/margins": 0.6485950350761414, |
|
"rewards/rejected": -4.196413993835449, |
|
"semantic_entropy": 0.8623871803283691, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.273224043715847, |
|
"grad_norm": 144.95477211017723, |
|
"learning_rate": 9.960285579068417e-07, |
|
"logits/chosen": -0.9688740968704224, |
|
"logits/rejected": -0.9354850053787231, |
|
"logps/chosen": -0.383869469165802, |
|
"logps/rejected": -0.47563114762306213, |
|
"loss": 2.8716, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -3.8386943340301514, |
|
"rewards/margins": 0.9176166653633118, |
|
"rewards/rejected": -4.756311416625977, |
|
"semantic_entropy": 0.8745672106742859, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.28415300546448086, |
|
"grad_norm": 47.745102969069876, |
|
"learning_rate": 9.94736194623663e-07, |
|
"logits/chosen": -0.9936184883117676, |
|
"logits/rejected": -0.9872056841850281, |
|
"logps/chosen": -0.4027808606624603, |
|
"logps/rejected": -0.5585031509399414, |
|
"loss": 2.8889, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -4.02780818939209, |
|
"rewards/margins": 1.5572230815887451, |
|
"rewards/rejected": -5.585031032562256, |
|
"semantic_entropy": 0.8549701571464539, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29508196721311475, |
|
"grad_norm": 43.036244798527335, |
|
"learning_rate": 9.932631735945526e-07, |
|
"logits/chosen": -1.018587350845337, |
|
"logits/rejected": -0.9396653175354004, |
|
"logps/chosen": -0.3934100568294525, |
|
"logps/rejected": -0.5400375127792358, |
|
"loss": 2.8008, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -3.934100389480591, |
|
"rewards/margins": 1.4662750959396362, |
|
"rewards/rejected": -5.4003753662109375, |
|
"semantic_entropy": 0.8907697796821594, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.30601092896174864, |
|
"grad_norm": 51.334063125222045, |
|
"learning_rate": 9.916100327075037e-07, |
|
"logits/chosen": -1.0269070863723755, |
|
"logits/rejected": -0.9736196398735046, |
|
"logps/chosen": -0.43043556809425354, |
|
"logps/rejected": -0.6303533911705017, |
|
"loss": 2.5701, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -4.304355621337891, |
|
"rewards/margins": 1.9991786479949951, |
|
"rewards/rejected": -6.303534507751465, |
|
"semantic_entropy": 0.9288080930709839, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31693989071038253, |
|
"grad_norm": 69.99654341210723, |
|
"learning_rate": 9.89777375623032e-07, |
|
"logits/chosen": -0.9977472424507141, |
|
"logits/rejected": -0.9811614751815796, |
|
"logps/chosen": -0.44030895829200745, |
|
"logps/rejected": -0.5321138501167297, |
|
"loss": 2.7244, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -4.4030890464782715, |
|
"rewards/margins": 0.9180487394332886, |
|
"rewards/rejected": -5.321138381958008, |
|
"semantic_entropy": 0.932425856590271, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.32786885245901637, |
|
"grad_norm": 51.74709430626173, |
|
"learning_rate": 9.877658715537428e-07, |
|
"logits/chosen": -1.0553128719329834, |
|
"logits/rejected": -1.0262110233306885, |
|
"logps/chosen": -0.5291231870651245, |
|
"logps/rejected": -0.7928577661514282, |
|
"loss": 2.6042, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -5.291232109069824, |
|
"rewards/margins": 2.637345552444458, |
|
"rewards/rejected": -7.928577423095703, |
|
"semantic_entropy": 0.9483098983764648, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33879781420765026, |
|
"grad_norm": 59.40984432787828, |
|
"learning_rate": 9.85576255019963e-07, |
|
"logits/chosen": -1.0320864915847778, |
|
"logits/rejected": -0.9819043278694153, |
|
"logps/chosen": -0.5477417707443237, |
|
"logps/rejected": -0.7481231093406677, |
|
"loss": 2.5957, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -5.477417945861816, |
|
"rewards/margins": 2.0038137435913086, |
|
"rewards/rejected": -7.481231689453125, |
|
"semantic_entropy": 0.9526890516281128, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.34972677595628415, |
|
"grad_norm": 63.33344115210913, |
|
"learning_rate": 9.832093255815216e-07, |
|
"logits/chosen": -1.0814168453216553, |
|
"logits/rejected": -1.0304033756256104, |
|
"logps/chosen": -0.6954716444015503, |
|
"logps/rejected": -0.8502774238586426, |
|
"loss": 2.6238, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -6.954716682434082, |
|
"rewards/margins": 1.5480577945709229, |
|
"rewards/rejected": -8.502774238586426, |
|
"semantic_entropy": 0.9549511671066284, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.36065573770491804, |
|
"grad_norm": 62.82535328280916, |
|
"learning_rate": 9.806659475457849e-07, |
|
"logits/chosen": -1.0839955806732178, |
|
"logits/rejected": -1.031585931777954, |
|
"logps/chosen": -0.7121194005012512, |
|
"logps/rejected": -0.8951581716537476, |
|
"loss": 2.5445, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -7.121194362640381, |
|
"rewards/margins": 1.8303883075714111, |
|
"rewards/rejected": -8.951581954956055, |
|
"semantic_entropy": 0.9896249771118164, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.37158469945355194, |
|
"grad_norm": 65.61173370500529, |
|
"learning_rate": 9.779470496520441e-07, |
|
"logits/chosen": -1.0843085050582886, |
|
"logits/rejected": -1.0285215377807617, |
|
"logps/chosen": -0.7273966670036316, |
|
"logps/rejected": -0.9349418878555298, |
|
"loss": 2.5832, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -7.2739667892456055, |
|
"rewards/margins": 2.0754518508911133, |
|
"rewards/rejected": -9.349418640136719, |
|
"semantic_entropy": 0.9762886762619019, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3825136612021858, |
|
"grad_norm": 48.476659698357665, |
|
"learning_rate": 9.750536247323789e-07, |
|
"logits/chosen": -1.1571153402328491, |
|
"logits/rejected": -1.131704330444336, |
|
"logps/chosen": -0.8265604972839355, |
|
"logps/rejected": -0.9824529886245728, |
|
"loss": 2.4619, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -8.265604019165039, |
|
"rewards/margins": 1.5589253902435303, |
|
"rewards/rejected": -9.824529647827148, |
|
"semantic_entropy": 0.9426374435424805, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.39344262295081966, |
|
"grad_norm": 55.305744786201686, |
|
"learning_rate": 9.719867293491144e-07, |
|
"logits/chosen": -1.1452279090881348, |
|
"logits/rejected": -1.1399190425872803, |
|
"logps/chosen": -0.8152974843978882, |
|
"logps/rejected": -1.16525137424469, |
|
"loss": 2.3679, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -8.152975082397461, |
|
"rewards/margins": 3.4995384216308594, |
|
"rewards/rejected": -11.65251350402832, |
|
"semantic_entropy": 0.9442623257637024, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.40437158469945356, |
|
"grad_norm": 50.733966507742444, |
|
"learning_rate": 9.687474834090067e-07, |
|
"logits/chosen": -1.1547253131866455, |
|
"logits/rejected": -1.1736373901367188, |
|
"logps/chosen": -0.8491543531417847, |
|
"logps/rejected": -1.1844466924667358, |
|
"loss": 2.3318, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -8.491543769836426, |
|
"rewards/margins": 3.35292387008667, |
|
"rewards/rejected": -11.844468116760254, |
|
"semantic_entropy": 0.9556644558906555, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.41530054644808745, |
|
"grad_norm": 62.277237758824675, |
|
"learning_rate": 9.653370697542987e-07, |
|
"logits/chosen": -1.162003755569458, |
|
"logits/rejected": -1.121468186378479, |
|
"logps/chosen": -0.8294251561164856, |
|
"logps/rejected": -1.1698486804962158, |
|
"loss": 2.3649, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -8.294252395629883, |
|
"rewards/margins": 3.4042346477508545, |
|
"rewards/rejected": -11.698487281799316, |
|
"semantic_entropy": 0.9534858465194702, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4262295081967213, |
|
"grad_norm": 62.09032006268862, |
|
"learning_rate": 9.617567337307935e-07, |
|
"logits/chosen": -1.1882003545761108, |
|
"logits/rejected": -1.1697113513946533, |
|
"logps/chosen": -0.9817994236946106, |
|
"logps/rejected": -1.3722710609436035, |
|
"loss": 2.4013, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -9.817992210388184, |
|
"rewards/margins": 3.904717206954956, |
|
"rewards/rejected": -13.722711563110352, |
|
"semantic_entropy": 0.9071667790412903, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4371584699453552, |
|
"grad_norm": 54.79531932098139, |
|
"learning_rate": 9.580077827331037e-07, |
|
"logits/chosen": -1.160315990447998, |
|
"logits/rejected": -1.0766620635986328, |
|
"logps/chosen": -0.8970209360122681, |
|
"logps/rejected": -1.2237987518310547, |
|
"loss": 2.3542, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -8.970209121704102, |
|
"rewards/margins": 3.267777919769287, |
|
"rewards/rejected": -12.237987518310547, |
|
"semantic_entropy": 0.9425733685493469, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44808743169398907, |
|
"grad_norm": 45.857415331803075, |
|
"learning_rate": 9.540915857272445e-07, |
|
"logits/chosen": -1.120792269706726, |
|
"logits/rejected": -1.1374807357788086, |
|
"logps/chosen": -0.7932685017585754, |
|
"logps/rejected": -1.1045658588409424, |
|
"loss": 2.2801, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -7.932684898376465, |
|
"rewards/margins": 3.112973690032959, |
|
"rewards/rejected": -11.045658111572266, |
|
"semantic_entropy": 0.9677651524543762, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.45901639344262296, |
|
"grad_norm": 71.89691225680161, |
|
"learning_rate": 9.500095727507419e-07, |
|
"logits/chosen": -1.1540464162826538, |
|
"logits/rejected": -1.1580009460449219, |
|
"logps/chosen": -0.8536632657051086, |
|
"logps/rejected": -1.2688827514648438, |
|
"loss": 2.1643, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -8.536632537841797, |
|
"rewards/margins": 4.152195453643799, |
|
"rewards/rejected": -12.688827514648438, |
|
"semantic_entropy": 0.9133696556091309, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46994535519125685, |
|
"grad_norm": 61.886918880598415, |
|
"learning_rate": 9.457632343904402e-07, |
|
"logits/chosen": -1.1507601737976074, |
|
"logits/rejected": -1.0994901657104492, |
|
"logps/chosen": -0.891444981098175, |
|
"logps/rejected": -1.3195106983184814, |
|
"loss": 2.2496, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -8.914449691772461, |
|
"rewards/margins": 4.2806572914123535, |
|
"rewards/rejected": -13.195106506347656, |
|
"semantic_entropy": 0.943720817565918, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4808743169398907, |
|
"grad_norm": 48.894845818998725, |
|
"learning_rate": 9.413541212382004e-07, |
|
"logits/chosen": -1.2136586904525757, |
|
"logits/rejected": -1.1905956268310547, |
|
"logps/chosen": -0.9255884289741516, |
|
"logps/rejected": -1.2389224767684937, |
|
"loss": 2.2122, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -9.255884170532227, |
|
"rewards/margins": 3.133340358734131, |
|
"rewards/rejected": -12.389223098754883, |
|
"semantic_entropy": 0.9290882349014282, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4918032786885246, |
|
"grad_norm": 53.07969298601074, |
|
"learning_rate": 9.367838433246857e-07, |
|
"logits/chosen": -1.2239024639129639, |
|
"logits/rejected": -1.1851261854171753, |
|
"logps/chosen": -0.8761332631111145, |
|
"logps/rejected": -1.2777061462402344, |
|
"loss": 2.1765, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -8.761332511901855, |
|
"rewards/margins": 4.0157294273376465, |
|
"rewards/rejected": -12.777061462402344, |
|
"semantic_entropy": 0.9319503903388977, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5027322404371585, |
|
"grad_norm": 51.09299897041373, |
|
"learning_rate": 9.320540695314438e-07, |
|
"logits/chosen": -1.1558756828308105, |
|
"logits/rejected": -1.1598188877105713, |
|
"logps/chosen": -0.8811947703361511, |
|
"logps/rejected": -1.2912404537200928, |
|
"loss": 2.2098, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -8.811946868896484, |
|
"rewards/margins": 4.100456714630127, |
|
"rewards/rejected": -12.91240406036377, |
|
"semantic_entropy": 0.9310176968574524, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5136612021857924, |
|
"grad_norm": 63.259306143827835, |
|
"learning_rate": 9.271665269814983e-07, |
|
"logits/chosen": -1.188391923904419, |
|
"logits/rejected": -1.1512023210525513, |
|
"logps/chosen": -0.8918437957763672, |
|
"logps/rejected": -1.2489241361618042, |
|
"loss": 2.1333, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -8.918437957763672, |
|
"rewards/margins": 3.5708038806915283, |
|
"rewards/rejected": -12.489240646362305, |
|
"semantic_entropy": 0.9315102696418762, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5245901639344263, |
|
"grad_norm": 55.527350378129, |
|
"learning_rate": 9.221230004086721e-07, |
|
"logits/chosen": -1.2678356170654297, |
|
"logits/rejected": -1.2772780656814575, |
|
"logps/chosen": -0.8592067956924438, |
|
"logps/rejected": -1.3196837902069092, |
|
"loss": 2.0237, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -8.592068672180176, |
|
"rewards/margins": 4.604770660400391, |
|
"rewards/rejected": -13.19683837890625, |
|
"semantic_entropy": 0.9410519599914551, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5355191256830601, |
|
"grad_norm": 46.907821708328406, |
|
"learning_rate": 9.169253315058763e-07, |
|
"logits/chosen": -1.1692125797271729, |
|
"logits/rejected": -1.125632405281067, |
|
"logps/chosen": -0.905608057975769, |
|
"logps/rejected": -1.3867673873901367, |
|
"loss": 2.1096, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -9.056081771850586, |
|
"rewards/margins": 4.811593055725098, |
|
"rewards/rejected": -13.867673873901367, |
|
"semantic_entropy": 0.921157717704773, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.546448087431694, |
|
"grad_norm": 47.861862507896085, |
|
"learning_rate": 9.11575418252596e-07, |
|
"logits/chosen": -1.232251763343811, |
|
"logits/rejected": -1.1941629648208618, |
|
"logps/chosen": -0.8441025614738464, |
|
"logps/rejected": -1.2240302562713623, |
|
"loss": 2.1618, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -8.441025733947754, |
|
"rewards/margins": 3.799276828765869, |
|
"rewards/rejected": -12.240303039550781, |
|
"semantic_entropy": 0.9252967834472656, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5573770491803278, |
|
"grad_norm": 54.40105116628037, |
|
"learning_rate": 9.060752142218257e-07, |
|
"logits/chosen": -1.213555932044983, |
|
"logits/rejected": -1.1773382425308228, |
|
"logps/chosen": -0.8959819078445435, |
|
"logps/rejected": -1.3679741621017456, |
|
"loss": 2.0365, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -8.959817886352539, |
|
"rewards/margins": 4.71992301940918, |
|
"rewards/rejected": -13.679742813110352, |
|
"semantic_entropy": 0.9322195053100586, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5683060109289617, |
|
"grad_norm": 43.449537508765815, |
|
"learning_rate": 9.004267278667031e-07, |
|
"logits/chosen": -1.1810890436172485, |
|
"logits/rejected": -1.1702289581298828, |
|
"logps/chosen": -0.8510452508926392, |
|
"logps/rejected": -1.3418259620666504, |
|
"loss": 2.011, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -8.510452270507812, |
|
"rewards/margins": 4.907806873321533, |
|
"rewards/rejected": -13.41826057434082, |
|
"semantic_entropy": 0.9143549203872681, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5792349726775956, |
|
"grad_norm": 46.818755419193465, |
|
"learning_rate": 8.946320217871025e-07, |
|
"logits/chosen": -1.1749790906906128, |
|
"logits/rejected": -1.1358766555786133, |
|
"logps/chosen": -0.855148434638977, |
|
"logps/rejected": -1.3291784524917603, |
|
"loss": 1.9976, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -8.551485061645508, |
|
"rewards/margins": 4.740299224853516, |
|
"rewards/rejected": -13.291783332824707, |
|
"semantic_entropy": 0.9298276901245117, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5901639344262295, |
|
"grad_norm": 95.7954675499385, |
|
"learning_rate": 8.886932119764565e-07, |
|
"logits/chosen": -1.1698591709136963, |
|
"logits/rejected": -1.1438281536102295, |
|
"logps/chosen": -0.8544471859931946, |
|
"logps/rejected": -1.377416968345642, |
|
"loss": 1.9774, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -8.544472694396973, |
|
"rewards/margins": 5.229698657989502, |
|
"rewards/rejected": -13.774169921875, |
|
"semantic_entropy": 0.9152740240097046, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6010928961748634, |
|
"grad_norm": 52.4740552697882, |
|
"learning_rate": 8.826124670490802e-07, |
|
"logits/chosen": -1.140944242477417, |
|
"logits/rejected": -1.0730197429656982, |
|
"logps/chosen": -0.8467117547988892, |
|
"logps/rejected": -1.2126039266586304, |
|
"loss": 1.9796, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -8.467116355895996, |
|
"rewards/margins": 3.658921718597412, |
|
"rewards/rejected": -12.126038551330566, |
|
"semantic_entropy": 0.933331310749054, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.6120218579234973, |
|
"grad_norm": 54.704164373442616, |
|
"learning_rate": 8.763920074482809e-07, |
|
"logits/chosen": -1.102807879447937, |
|
"logits/rejected": -1.105039358139038, |
|
"logps/chosen": -0.8896454572677612, |
|
"logps/rejected": -1.4699008464813232, |
|
"loss": 1.9808, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -8.896454811096191, |
|
"rewards/margins": 5.802553176879883, |
|
"rewards/rejected": -14.699007987976074, |
|
"semantic_entropy": 0.8732292056083679, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6229508196721312, |
|
"grad_norm": 48.33002211252601, |
|
"learning_rate": 8.700341046355411e-07, |
|
"logits/chosen": -1.2859059572219849, |
|
"logits/rejected": -1.2477091550827026, |
|
"logps/chosen": -0.8521019220352173, |
|
"logps/rejected": -1.4364469051361084, |
|
"loss": 1.8954, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -8.52101993560791, |
|
"rewards/margins": 5.843448162078857, |
|
"rewards/rejected": -14.364468574523926, |
|
"semantic_entropy": 0.9044594764709473, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6338797814207651, |
|
"grad_norm": 62.5830858895554, |
|
"learning_rate": 8.635410802610723e-07, |
|
"logits/chosen": -1.2080810070037842, |
|
"logits/rejected": -1.1687798500061035, |
|
"logps/chosen": -0.8889066576957703, |
|
"logps/rejected": -1.4597949981689453, |
|
"loss": 1.9215, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -8.889066696166992, |
|
"rewards/margins": 5.708883762359619, |
|
"rewards/rejected": -14.59795093536377, |
|
"semantic_entropy": 0.903703510761261, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.644808743169399, |
|
"grad_norm": 52.105468651247094, |
|
"learning_rate": 8.569153053160428e-07, |
|
"logits/chosen": -1.1924866437911987, |
|
"logits/rejected": -1.182565689086914, |
|
"logps/chosen": -0.9297744035720825, |
|
"logps/rejected": -1.5572900772094727, |
|
"loss": 1.8847, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -9.29774284362793, |
|
"rewards/margins": 6.2751569747924805, |
|
"rewards/rejected": -15.572900772094727, |
|
"semantic_entropy": 0.8886201977729797, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6557377049180327, |
|
"grad_norm": 43.97404227922028, |
|
"learning_rate": 8.501591992667849e-07, |
|
"logits/chosen": -1.2417964935302734, |
|
"logits/rejected": -1.2167500257492065, |
|
"logps/chosen": -0.9788614511489868, |
|
"logps/rejected": -1.5977232456207275, |
|
"loss": 1.9048, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -9.788614273071289, |
|
"rewards/margins": 6.188617706298828, |
|
"rewards/rejected": -15.977231979370117, |
|
"semantic_entropy": 0.8578527569770813, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 62.519761231188205, |
|
"learning_rate": 8.432752291713058e-07, |
|
"logits/chosen": -1.227373719215393, |
|
"logits/rejected": -1.1630009412765503, |
|
"logps/chosen": -0.9313735961914062, |
|
"logps/rejected": -1.6220667362213135, |
|
"loss": 1.876, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -9.313735008239746, |
|
"rewards/margins": 6.906930446624756, |
|
"rewards/rejected": -16.220666885375977, |
|
"semantic_entropy": 0.8703945875167847, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6775956284153005, |
|
"grad_norm": 47.74676931324823, |
|
"learning_rate": 8.362659087784152e-07, |
|
"logits/chosen": -1.1420575380325317, |
|
"logits/rejected": -1.1442222595214844, |
|
"logps/chosen": -0.921275794506073, |
|
"logps/rejected": -1.5964065790176392, |
|
"loss": 1.9255, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -9.212759017944336, |
|
"rewards/margins": 6.751306056976318, |
|
"rewards/rejected": -15.964065551757812, |
|
"semantic_entropy": 0.8867815732955933, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6885245901639344, |
|
"grad_norm": 48.12633140725401, |
|
"learning_rate": 8.291337976098067e-07, |
|
"logits/chosen": -1.1699371337890625, |
|
"logits/rejected": -1.1596167087554932, |
|
"logps/chosen": -0.9925182461738586, |
|
"logps/rejected": -1.4757254123687744, |
|
"loss": 1.8872, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -9.925182342529297, |
|
"rewards/margins": 4.832071781158447, |
|
"rewards/rejected": -14.757253646850586, |
|
"semantic_entropy": 0.8734658360481262, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6994535519125683, |
|
"grad_norm": 47.1038569824555, |
|
"learning_rate": 8.218815000254231e-07, |
|
"logits/chosen": -1.2591969966888428, |
|
"logits/rejected": -1.1927886009216309, |
|
"logps/chosen": -0.8629493713378906, |
|
"logps/rejected": -1.4769127368927002, |
|
"loss": 1.8067, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -8.629494667053223, |
|
"rewards/margins": 6.139632225036621, |
|
"rewards/rejected": -14.769126892089844, |
|
"semantic_entropy": 0.9108262062072754, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7103825136612022, |
|
"grad_norm": 56.67465709928985, |
|
"learning_rate": 8.145116642724485e-07, |
|
"logits/chosen": -1.2181096076965332, |
|
"logits/rejected": -1.189969778060913, |
|
"logps/chosen": -0.8706620335578918, |
|
"logps/rejected": -1.4245946407318115, |
|
"loss": 1.8061, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -8.706620216369629, |
|
"rewards/margins": 5.539328098297119, |
|
"rewards/rejected": -14.245946884155273, |
|
"semantic_entropy": 0.893680214881897, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7213114754098361, |
|
"grad_norm": 43.692074758430785, |
|
"learning_rate": 8.07026981518276e-07, |
|
"logits/chosen": -1.1343576908111572, |
|
"logits/rejected": -1.0772193670272827, |
|
"logps/chosen": -0.8813779950141907, |
|
"logps/rejected": -1.7738568782806396, |
|
"loss": 1.7373, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -8.813779830932617, |
|
"rewards/margins": 8.924787521362305, |
|
"rewards/rejected": -17.73856544494629, |
|
"semantic_entropy": 0.8537489771842957, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.73224043715847, |
|
"grad_norm": 54.41817403205364, |
|
"learning_rate": 7.994301848678004e-07, |
|
"logits/chosen": -1.134152889251709, |
|
"logits/rejected": -1.063077449798584, |
|
"logps/chosen": -0.9365140199661255, |
|
"logps/rejected": -1.6991326808929443, |
|
"loss": 1.766, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -9.365139961242676, |
|
"rewards/margins": 7.626187324523926, |
|
"rewards/rejected": -16.9913272857666, |
|
"semantic_entropy": 0.8437296152114868, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7431693989071039, |
|
"grad_norm": 56.714537939738605, |
|
"learning_rate": 7.917240483654e-07, |
|
"logits/chosen": -1.1386888027191162, |
|
"logits/rejected": -1.0677882432937622, |
|
"logps/chosen": -0.9699214100837708, |
|
"logps/rejected": -1.7819700241088867, |
|
"loss": 1.8199, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -9.699213981628418, |
|
"rewards/margins": 8.12048625946045, |
|
"rewards/rejected": -17.819698333740234, |
|
"semantic_entropy": 0.8428508639335632, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7540983606557377, |
|
"grad_norm": 54.15768742157569, |
|
"learning_rate": 7.839113859819656e-07, |
|
"logits/chosen": -1.2082730531692505, |
|
"logits/rejected": -1.1757750511169434, |
|
"logps/chosen": -1.0214024782180786, |
|
"logps/rejected": -1.8994626998901367, |
|
"loss": 1.8236, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -10.214024543762207, |
|
"rewards/margins": 8.78060245513916, |
|
"rewards/rejected": -18.994626998901367, |
|
"semantic_entropy": 0.818555474281311, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7650273224043715, |
|
"grad_norm": 52.04532684140525, |
|
"learning_rate": 7.759950505873521e-07, |
|
"logits/chosen": -1.2180219888687134, |
|
"logits/rejected": -1.1834783554077148, |
|
"logps/chosen": -0.7670449018478394, |
|
"logps/rejected": -1.324202060699463, |
|
"loss": 1.7353, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -7.670449256896973, |
|
"rewards/margins": 5.571571350097656, |
|
"rewards/rejected": -13.242021560668945, |
|
"semantic_entropy": 0.9124476313591003, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7759562841530054, |
|
"grad_norm": 52.511907795888796, |
|
"learning_rate": 7.67977932908626e-07, |
|
"logits/chosen": -1.175022840499878, |
|
"logits/rejected": -1.1130549907684326, |
|
"logps/chosen": -0.8713346719741821, |
|
"logps/rejected": -1.66217839717865, |
|
"loss": 1.726, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -8.713346481323242, |
|
"rewards/margins": 7.908437252044678, |
|
"rewards/rejected": -16.621784210205078, |
|
"semantic_entropy": 0.8560686111450195, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7868852459016393, |
|
"grad_norm": 47.66801579095495, |
|
"learning_rate": 7.598629604744872e-07, |
|
"logits/chosen": -1.1504714488983154, |
|
"logits/rejected": -1.121519923210144, |
|
"logps/chosen": -1.078308343887329, |
|
"logps/rejected": -2.017784833908081, |
|
"loss": 1.687, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -10.783082008361816, |
|
"rewards/margins": 9.394767761230469, |
|
"rewards/rejected": -20.17784881591797, |
|
"semantic_entropy": 0.8011868596076965, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7978142076502732, |
|
"grad_norm": 78.73352396462461, |
|
"learning_rate": 7.516530965462539e-07, |
|
"logits/chosen": -1.2399051189422607, |
|
"logits/rejected": -1.2221591472625732, |
|
"logps/chosen": -0.869607150554657, |
|
"logps/rejected": -1.7532609701156616, |
|
"loss": 1.6969, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -8.696072578430176, |
|
"rewards/margins": 8.836538314819336, |
|
"rewards/rejected": -17.532609939575195, |
|
"semantic_entropy": 0.8715127110481262, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.8087431693989071, |
|
"grad_norm": 52.51768985735217, |
|
"learning_rate": 7.433513390357989e-07, |
|
"logits/chosen": -1.2507340908050537, |
|
"logits/rejected": -1.187475562095642, |
|
"logps/chosen": -0.9717696905136108, |
|
"logps/rejected": -2.0153520107269287, |
|
"loss": 1.6488, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -9.717697143554688, |
|
"rewards/margins": 10.435824394226074, |
|
"rewards/rejected": -20.153522491455078, |
|
"semantic_entropy": 0.8269231915473938, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.819672131147541, |
|
"grad_norm": 50.10941942498599, |
|
"learning_rate": 7.349607194108322e-07, |
|
"logits/chosen": -1.2848598957061768, |
|
"logits/rejected": -1.1889159679412842, |
|
"logps/chosen": -0.8790639638900757, |
|
"logps/rejected": -1.7771461009979248, |
|
"loss": 1.6703, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -8.790639877319336, |
|
"rewards/margins": 8.980820655822754, |
|
"rewards/rejected": -17.771459579467773, |
|
"semantic_entropy": 0.853074848651886, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8306010928961749, |
|
"grad_norm": 45.566081133100745, |
|
"learning_rate": 7.264843015879321e-07, |
|
"logits/chosen": -1.1421478986740112, |
|
"logits/rejected": -1.140625238418579, |
|
"logps/chosen": -0.9042370915412903, |
|
"logps/rejected": -1.7280666828155518, |
|
"loss": 1.541, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -9.04237174987793, |
|
"rewards/margins": 8.23829460144043, |
|
"rewards/rejected": -17.280664443969727, |
|
"semantic_entropy": 0.8745312690734863, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8415300546448088, |
|
"grad_norm": 59.00085660352214, |
|
"learning_rate": 7.17925180813725e-07, |
|
"logits/chosen": -1.2217355966567993, |
|
"logits/rejected": -1.159557580947876, |
|
"logps/chosen": -1.042198657989502, |
|
"logps/rejected": -2.1717679500579834, |
|
"loss": 1.7473, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -10.42198657989502, |
|
"rewards/margins": 11.295695304870605, |
|
"rewards/rejected": -21.717683792114258, |
|
"semantic_entropy": 0.8145696520805359, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8524590163934426, |
|
"grad_norm": 68.24919118342267, |
|
"learning_rate": 7.092864825346266e-07, |
|
"logits/chosen": -1.2256710529327393, |
|
"logits/rejected": -1.154592752456665, |
|
"logps/chosen": -0.8894011378288269, |
|
"logps/rejected": -2.0597283840179443, |
|
"loss": 1.5906, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -8.894010543823242, |
|
"rewards/margins": 11.703274726867676, |
|
"rewards/rejected": -20.597286224365234, |
|
"semantic_entropy": 0.8356989026069641, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8633879781420765, |
|
"grad_norm": 52.86840793380424, |
|
"learning_rate": 7.005713612555545e-07, |
|
"logits/chosen": -1.1973850727081299, |
|
"logits/rejected": -1.15791654586792, |
|
"logps/chosen": -0.9084303975105286, |
|
"logps/rejected": -1.824072241783142, |
|
"loss": 1.5811, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -9.084303855895996, |
|
"rewards/margins": 9.156417846679688, |
|
"rewards/rejected": -18.240720748901367, |
|
"semantic_entropy": 0.863986611366272, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8743169398907104, |
|
"grad_norm": 54.969346083508704, |
|
"learning_rate": 6.917829993880302e-07, |
|
"logits/chosen": -1.1350136995315552, |
|
"logits/rejected": -1.078984022140503, |
|
"logps/chosen": -0.9205960035324097, |
|
"logps/rejected": -1.9763364791870117, |
|
"loss": 1.5778, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -9.205960273742676, |
|
"rewards/margins": 10.557405471801758, |
|
"rewards/rejected": -19.763364791870117, |
|
"semantic_entropy": 0.8187274932861328, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8743169398907104, |
|
"eval_logits/chosen": -1.5077557563781738, |
|
"eval_logits/rejected": -1.432308554649353, |
|
"eval_logps/chosen": -0.868651807308197, |
|
"eval_logps/rejected": -1.8860282897949219, |
|
"eval_loss": 1.6372781991958618, |
|
"eval_rewards/accuracies": 0.8734939694404602, |
|
"eval_rewards/chosen": -8.686517715454102, |
|
"eval_rewards/margins": 10.173765182495117, |
|
"eval_rewards/rejected": -18.86028289794922, |
|
"eval_runtime": 37.7445, |
|
"eval_samples_per_second": 34.919, |
|
"eval_semantic_entropy": 0.8519198894500732, |
|
"eval_steps_per_second": 2.199, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8852459016393442, |
|
"grad_norm": 54.747379817385166, |
|
"learning_rate": 6.8292460608809e-07, |
|
"logits/chosen": -1.1865565776824951, |
|
"logits/rejected": -1.0789119005203247, |
|
"logps/chosen": -0.8656112551689148, |
|
"logps/rejected": -1.9079488515853882, |
|
"loss": 1.557, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -8.656112670898438, |
|
"rewards/margins": 10.423376083374023, |
|
"rewards/rejected": -19.07948875427246, |
|
"semantic_entropy": 0.8483451008796692, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8961748633879781, |
|
"grad_norm": 54.38709320329884, |
|
"learning_rate": 6.739994160844309e-07, |
|
"logits/chosen": -1.2001937627792358, |
|
"logits/rejected": -1.2109323740005493, |
|
"logps/chosen": -1.0198501348495483, |
|
"logps/rejected": -2.304253101348877, |
|
"loss": 1.5398, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -10.198502540588379, |
|
"rewards/margins": 12.844027519226074, |
|
"rewards/rejected": -23.042530059814453, |
|
"semantic_entropy": 0.7884197235107422, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.907103825136612, |
|
"grad_norm": 58.8994587847891, |
|
"learning_rate": 6.650106884972176e-07, |
|
"logits/chosen": -1.2297394275665283, |
|
"logits/rejected": -1.2055060863494873, |
|
"logps/chosen": -0.8097732663154602, |
|
"logps/rejected": -2.0647740364074707, |
|
"loss": 1.6318, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -8.097734451293945, |
|
"rewards/margins": 12.550005912780762, |
|
"rewards/rejected": -20.647741317749023, |
|
"semantic_entropy": 0.8577386736869812, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.9180327868852459, |
|
"grad_norm": 66.32923235150443, |
|
"learning_rate": 6.559617056479827e-07, |
|
"logits/chosen": -1.2397379875183105, |
|
"logits/rejected": -1.1944515705108643, |
|
"logps/chosen": -0.9744995832443237, |
|
"logps/rejected": -2.2359464168548584, |
|
"loss": 1.5364, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -9.7449951171875, |
|
"rewards/margins": 12.614469528198242, |
|
"rewards/rejected": -22.359464645385742, |
|
"semantic_entropy": 0.8098868131637573, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9289617486338798, |
|
"grad_norm": 64.90064469639756, |
|
"learning_rate": 6.468557718610559e-07, |
|
"logits/chosen": -1.2209162712097168, |
|
"logits/rejected": -1.169478178024292, |
|
"logps/chosen": -1.0786913633346558, |
|
"logps/rejected": -2.5019688606262207, |
|
"loss": 1.6058, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -10.786913871765137, |
|
"rewards/margins": 14.232770919799805, |
|
"rewards/rejected": -25.019685745239258, |
|
"semantic_entropy": 0.7745442390441895, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9398907103825137, |
|
"grad_norm": 65.90460986634548, |
|
"learning_rate": 6.376962122569567e-07, |
|
"logits/chosen": -1.1558514833450317, |
|
"logits/rejected": -1.1550347805023193, |
|
"logps/chosen": -0.6848023533821106, |
|
"logps/rejected": -1.8477531671524048, |
|
"loss": 1.3787, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -6.848023414611816, |
|
"rewards/margins": 11.629508972167969, |
|
"rewards/rejected": -18.4775333404541, |
|
"semantic_entropy": 0.8978629112243652, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9508196721311475, |
|
"grad_norm": 80.36478809238143, |
|
"learning_rate": 6.284863715381948e-07, |
|
"logits/chosen": -1.2516933679580688, |
|
"logits/rejected": -1.2447582483291626, |
|
"logps/chosen": -0.8717735409736633, |
|
"logps/rejected": -2.2636890411376953, |
|
"loss": 1.5367, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -8.717737197875977, |
|
"rewards/margins": 13.919151306152344, |
|
"rewards/rejected": -22.63688850402832, |
|
"semantic_entropy": 0.8273345828056335, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9617486338797814, |
|
"grad_norm": 79.39000046120883, |
|
"learning_rate": 6.192296127679192e-07, |
|
"logits/chosen": -1.1874706745147705, |
|
"logits/rejected": -1.1192582845687866, |
|
"logps/chosen": -0.9044081568717957, |
|
"logps/rejected": -2.0115015506744385, |
|
"loss": 1.5428, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -9.04408073425293, |
|
"rewards/margins": 11.070935249328613, |
|
"rewards/rejected": -20.11501693725586, |
|
"semantic_entropy": 0.8257206082344055, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9726775956284153, |
|
"grad_norm": 59.45278594899511, |
|
"learning_rate": 6.099293161418629e-07, |
|
"logits/chosen": -1.2240984439849854, |
|
"logits/rejected": -1.18662428855896, |
|
"logps/chosen": -0.6975774168968201, |
|
"logps/rejected": -1.919647216796875, |
|
"loss": 1.5818, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -6.97577428817749, |
|
"rewards/margins": 12.220699310302734, |
|
"rewards/rejected": -19.196474075317383, |
|
"semantic_entropy": 0.887184739112854, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9836065573770492, |
|
"grad_norm": 53.56869631451961, |
|
"learning_rate": 6.005888777540319e-07, |
|
"logits/chosen": -1.1677896976470947, |
|
"logits/rejected": -1.1477397680282593, |
|
"logps/chosen": -0.8627035021781921, |
|
"logps/rejected": -1.9724452495574951, |
|
"loss": 1.5352, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -8.627036094665527, |
|
"rewards/margins": 11.097416877746582, |
|
"rewards/rejected": -19.72445297241211, |
|
"semantic_entropy": 0.8503534197807312, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.994535519125683, |
|
"grad_norm": 75.11227313091236, |
|
"learning_rate": 5.912117083565873e-07, |
|
"logits/chosen": -1.1938502788543701, |
|
"logits/rejected": -1.1654444932937622, |
|
"logps/chosen": -1.1713725328445435, |
|
"logps/rejected": -2.3690249919891357, |
|
"loss": 1.5941, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -11.713726043701172, |
|
"rewards/margins": 11.976524353027344, |
|
"rewards/rejected": -23.690250396728516, |
|
"semantic_entropy": 0.7848092913627625, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.005464480874317, |
|
"grad_norm": 48.44167575969943, |
|
"learning_rate": 5.818012321143773e-07, |
|
"logits/chosen": -1.2322055101394653, |
|
"logits/rejected": -1.1756855249404907, |
|
"logps/chosen": -0.8835703730583191, |
|
"logps/rejected": -2.2671618461608887, |
|
"loss": 1.3987, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -8.835702896118164, |
|
"rewards/margins": 13.835916519165039, |
|
"rewards/rejected": -22.671619415283203, |
|
"semantic_entropy": 0.8247418403625488, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.0163934426229508, |
|
"grad_norm": 47.683623908009125, |
|
"learning_rate": 5.723608853545684e-07, |
|
"logits/chosen": -1.2683448791503906, |
|
"logits/rejected": -1.2093217372894287, |
|
"logps/chosen": -0.8307113647460938, |
|
"logps/rejected": -2.3884284496307373, |
|
"loss": 1.1472, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -8.307112693786621, |
|
"rewards/margins": 15.577173233032227, |
|
"rewards/rejected": -23.88428497314453, |
|
"semantic_entropy": 0.8331409692764282, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.0273224043715847, |
|
"grad_norm": 57.239399331005785, |
|
"learning_rate": 5.628941153118388e-07, |
|
"logits/chosen": -1.2552951574325562, |
|
"logits/rejected": -1.2222687005996704, |
|
"logps/chosen": -0.8629674911499023, |
|
"logps/rejected": -2.325558662414551, |
|
"loss": 1.1426, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -8.629674911499023, |
|
"rewards/margins": 14.625910758972168, |
|
"rewards/rejected": -23.25558853149414, |
|
"semantic_entropy": 0.8217577934265137, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0382513661202186, |
|
"grad_norm": 39.48804343487935, |
|
"learning_rate": 5.534043788695852e-07, |
|
"logits/chosen": -1.22693932056427, |
|
"logits/rejected": -1.1497706174850464, |
|
"logps/chosen": -0.7519802451133728, |
|
"logps/rejected": -2.1450114250183105, |
|
"loss": 1.0975, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -7.519803047180176, |
|
"rewards/margins": 13.930310249328613, |
|
"rewards/rejected": -21.450115203857422, |
|
"semantic_entropy": 0.8537012338638306, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.0491803278688525, |
|
"grad_norm": 37.024988536485964, |
|
"learning_rate": 5.438951412976098e-07, |
|
"logits/chosen": -1.3238413333892822, |
|
"logits/rejected": -1.2577579021453857, |
|
"logps/chosen": -0.7658538818359375, |
|
"logps/rejected": -2.0598320960998535, |
|
"loss": 1.1533, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -7.658538818359375, |
|
"rewards/margins": 12.939779281616211, |
|
"rewards/rejected": -20.598318099975586, |
|
"semantic_entropy": 0.8649771809577942, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.0601092896174864, |
|
"grad_norm": 42.1526889978167, |
|
"learning_rate": 5.34369874986742e-07, |
|
"logits/chosen": -1.2668297290802002, |
|
"logits/rejected": -1.1939513683319092, |
|
"logps/chosen": -0.8974517583847046, |
|
"logps/rejected": -2.424004077911377, |
|
"loss": 1.0247, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -8.974517822265625, |
|
"rewards/margins": 15.265522956848145, |
|
"rewards/rejected": -24.24004364013672, |
|
"semantic_entropy": 0.7897659540176392, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.0710382513661203, |
|
"grad_norm": 52.525378226092165, |
|
"learning_rate": 5.248320581808619e-07, |
|
"logits/chosen": -1.2010338306427002, |
|
"logits/rejected": -1.1409817934036255, |
|
"logps/chosen": -0.7397095561027527, |
|
"logps/rejected": -2.3880066871643066, |
|
"loss": 1.1343, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -7.397095680236816, |
|
"rewards/margins": 16.48297119140625, |
|
"rewards/rejected": -23.88006591796875, |
|
"semantic_entropy": 0.8509289026260376, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.0819672131147542, |
|
"grad_norm": 57.24209028140043, |
|
"learning_rate": 5.15285173706785e-07, |
|
"logits/chosen": -1.2966060638427734, |
|
"logits/rejected": -1.2440364360809326, |
|
"logps/chosen": -0.7074769139289856, |
|
"logps/rejected": -2.2080492973327637, |
|
"loss": 1.104, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -7.074769496917725, |
|
"rewards/margins": 15.00572681427002, |
|
"rewards/rejected": -22.080495834350586, |
|
"semantic_entropy": 0.862097442150116, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.092896174863388, |
|
"grad_norm": 60.20969441966712, |
|
"learning_rate": 5.057327077024744e-07, |
|
"logits/chosen": -1.31562340259552, |
|
"logits/rejected": -1.2055505514144897, |
|
"logps/chosen": -0.7696375846862793, |
|
"logps/rejected": -2.1600234508514404, |
|
"loss": 1.0776, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -7.696375370025635, |
|
"rewards/margins": 13.903857231140137, |
|
"rewards/rejected": -21.600234985351562, |
|
"semantic_entropy": 0.8503168821334839, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.1038251366120218, |
|
"grad_norm": 39.37970422474807, |
|
"learning_rate": 4.961781483440433e-07, |
|
"logits/chosen": -1.2652629613876343, |
|
"logits/rejected": -1.155110239982605, |
|
"logps/chosen": -0.7121917009353638, |
|
"logps/rejected": -2.2156224250793457, |
|
"loss": 1.0684, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -7.1219162940979, |
|
"rewards/margins": 15.034309387207031, |
|
"rewards/rejected": -22.156227111816406, |
|
"semantic_entropy": 0.856345534324646, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.1147540983606556, |
|
"grad_norm": 53.63055077579748, |
|
"learning_rate": 4.866249845720132e-07, |
|
"logits/chosen": -1.2122000455856323, |
|
"logits/rejected": -1.1381186246871948, |
|
"logps/chosen": -0.7895854115486145, |
|
"logps/rejected": -2.1967644691467285, |
|
"loss": 1.1991, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -7.8958539962768555, |
|
"rewards/margins": 14.071792602539062, |
|
"rewards/rejected": -21.96764373779297, |
|
"semantic_entropy": 0.8369362950325012, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.1256830601092895, |
|
"grad_norm": 45.3883880528581, |
|
"learning_rate": 4.770767048172948e-07, |
|
"logits/chosen": -1.2122347354888916, |
|
"logits/rejected": -1.149927020072937, |
|
"logps/chosen": -0.7574501633644104, |
|
"logps/rejected": -2.262672185897827, |
|
"loss": 1.0855, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.574501037597656, |
|
"rewards/margins": 15.052220344543457, |
|
"rewards/rejected": -22.62672233581543, |
|
"semantic_entropy": 0.8394317626953125, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.1366120218579234, |
|
"grad_norm": 40.766203312385706, |
|
"learning_rate": 4.675367957273505e-07, |
|
"logits/chosen": -1.2204854488372803, |
|
"logits/rejected": -1.144971251487732, |
|
"logps/chosen": -0.7849557995796204, |
|
"logps/rejected": -2.2667272090911865, |
|
"loss": 1.0264, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -7.849558353424072, |
|
"rewards/margins": 14.817715644836426, |
|
"rewards/rejected": -22.66727066040039, |
|
"semantic_entropy": 0.8283472061157227, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.1475409836065573, |
|
"grad_norm": 42.8963401742162, |
|
"learning_rate": 4.5800874089301455e-07, |
|
"logits/chosen": -1.261281132698059, |
|
"logits/rejected": -1.1677086353302002, |
|
"logps/chosen": -0.7403801679611206, |
|
"logps/rejected": -2.290158987045288, |
|
"loss": 0.9619, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -7.403802394866943, |
|
"rewards/margins": 15.497787475585938, |
|
"rewards/rejected": -22.901592254638672, |
|
"semantic_entropy": 0.8431955575942993, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.1584699453551912, |
|
"grad_norm": 57.97538998117191, |
|
"learning_rate": 4.4849601957642285e-07, |
|
"logits/chosen": -1.174661636352539, |
|
"logits/rejected": -1.115818738937378, |
|
"logps/chosen": -0.7541646361351013, |
|
"logps/rejected": -2.2110159397125244, |
|
"loss": 1.0935, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -7.5416460037231445, |
|
"rewards/margins": 14.568511962890625, |
|
"rewards/rejected": -22.110157012939453, |
|
"semantic_entropy": 0.853602409362793, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.169398907103825, |
|
"grad_norm": 56.26607000357583, |
|
"learning_rate": 4.390021054405286e-07, |
|
"logits/chosen": -1.240636944770813, |
|
"logits/rejected": -1.1869792938232422, |
|
"logps/chosen": -0.7534674406051636, |
|
"logps/rejected": -2.2876932621002197, |
|
"loss": 0.9657, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -7.534674167633057, |
|
"rewards/margins": 15.342257499694824, |
|
"rewards/rejected": -22.87693214416504, |
|
"semantic_entropy": 0.8402601480484009, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.180327868852459, |
|
"grad_norm": 54.25638397035917, |
|
"learning_rate": 4.295304652806592e-07, |
|
"logits/chosen": -1.2079153060913086, |
|
"logits/rejected": -1.142287015914917, |
|
"logps/chosen": -0.611890971660614, |
|
"logps/rejected": -2.0176615715026855, |
|
"loss": 1.0051, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -6.11890983581543, |
|
"rewards/margins": 14.057706832885742, |
|
"rewards/rejected": -20.17661476135254, |
|
"semantic_entropy": 0.8606586456298828, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.1912568306010929, |
|
"grad_norm": 44.34686440564056, |
|
"learning_rate": 4.200845577585826e-07, |
|
"logits/chosen": -1.2312743663787842, |
|
"logits/rejected": -1.1274607181549072, |
|
"logps/chosen": -0.6904948353767395, |
|
"logps/rejected": -2.0026180744171143, |
|
"loss": 1.0628, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -6.9049482345581055, |
|
"rewards/margins": 13.121232986450195, |
|
"rewards/rejected": -20.026180267333984, |
|
"semantic_entropy": 0.839868426322937, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.2021857923497268, |
|
"grad_norm": 51.975486510086114, |
|
"learning_rate": 4.106678321395433e-07, |
|
"logits/chosen": -1.1899176836013794, |
|
"logits/rejected": -1.1200889348983765, |
|
"logps/chosen": -0.7009586095809937, |
|
"logps/rejected": -2.399099826812744, |
|
"loss": 0.9114, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -7.009586334228516, |
|
"rewards/margins": 16.98141098022461, |
|
"rewards/rejected": -23.990997314453125, |
|
"semantic_entropy": 0.8362213373184204, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.2131147540983607, |
|
"grad_norm": 39.51029900614786, |
|
"learning_rate": 4.012837270327288e-07, |
|
"logits/chosen": -1.1518226861953735, |
|
"logits/rejected": -1.1040208339691162, |
|
"logps/chosen": -0.6657946705818176, |
|
"logps/rejected": -2.024448871612549, |
|
"loss": 1.0111, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -6.6579461097717285, |
|
"rewards/margins": 13.586542129516602, |
|
"rewards/rejected": -20.244487762451172, |
|
"semantic_entropy": 0.8607606887817383, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.2240437158469946, |
|
"grad_norm": 47.0785790742371, |
|
"learning_rate": 3.9193566913562915e-07, |
|
"logits/chosen": -1.2187812328338623, |
|
"logits/rejected": -1.1253793239593506, |
|
"logps/chosen": -0.8078786730766296, |
|
"logps/rejected": -2.1750519275665283, |
|
"loss": 1.0263, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -8.078786849975586, |
|
"rewards/margins": 13.671732902526855, |
|
"rewards/rejected": -21.750518798828125, |
|
"semantic_entropy": 0.8194610476493835, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.2349726775956285, |
|
"grad_norm": 44.31080037447064, |
|
"learning_rate": 3.826270719827435e-07, |
|
"logits/chosen": -1.2184025049209595, |
|
"logits/rejected": -1.1244232654571533, |
|
"logps/chosen": -0.7781059741973877, |
|
"logps/rejected": -2.595242977142334, |
|
"loss": 1.0496, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -7.781059265136719, |
|
"rewards/margins": 18.171369552612305, |
|
"rewards/rejected": -25.952428817749023, |
|
"semantic_entropy": 0.8032097816467285, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.2459016393442623, |
|
"grad_norm": 57.680087176882985, |
|
"learning_rate": 3.7336133469909623e-07, |
|
"logits/chosen": -1.262069821357727, |
|
"logits/rejected": -1.203547477722168, |
|
"logps/chosen": -0.7461926341056824, |
|
"logps/rejected": -2.1672732830047607, |
|
"loss": 1.1028, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.461926460266113, |
|
"rewards/margins": 14.210809707641602, |
|
"rewards/rejected": -21.6727352142334, |
|
"semantic_entropy": 0.8577653169631958, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.2568306010928962, |
|
"grad_norm": 46.59857147414731, |
|
"learning_rate": 3.64141840759012e-07, |
|
"logits/chosen": -1.1375811100006104, |
|
"logits/rejected": -1.0560975074768066, |
|
"logps/chosen": -0.6888304948806763, |
|
"logps/rejected": -2.229635238647461, |
|
"loss": 0.9418, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -6.8883056640625, |
|
"rewards/margins": 15.408047676086426, |
|
"rewards/rejected": -22.29635238647461, |
|
"semantic_entropy": 0.8547189831733704, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.2677595628415301, |
|
"grad_norm": 70.16919238335676, |
|
"learning_rate": 3.549719567506076e-07, |
|
"logits/chosen": -1.1417677402496338, |
|
"logits/rejected": -1.1007084846496582, |
|
"logps/chosen": -0.746972918510437, |
|
"logps/rejected": -2.0715861320495605, |
|
"loss": 0.9986, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -7.469728946685791, |
|
"rewards/margins": 13.246131896972656, |
|
"rewards/rejected": -20.715862274169922, |
|
"semantic_entropy": 0.8440540432929993, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.278688524590164, |
|
"grad_norm": 39.105942102294286, |
|
"learning_rate": 3.4585503114644996e-07, |
|
"logits/chosen": -1.2692724466323853, |
|
"logits/rejected": -1.1571121215820312, |
|
"logps/chosen": -0.7609504461288452, |
|
"logps/rejected": -2.3702054023742676, |
|
"loss": 1.0065, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.609503746032715, |
|
"rewards/margins": 16.092552185058594, |
|
"rewards/rejected": -23.70205307006836, |
|
"semantic_entropy": 0.8199702501296997, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.289617486338798, |
|
"grad_norm": 35.90062312874268, |
|
"learning_rate": 3.3679439308082774e-07, |
|
"logits/chosen": -1.226792335510254, |
|
"logits/rejected": -1.176424264907837, |
|
"logps/chosen": -0.6281425356864929, |
|
"logps/rejected": -2.045499324798584, |
|
"loss": 0.9731, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -6.281425952911377, |
|
"rewards/margins": 14.173568725585938, |
|
"rewards/rejected": -20.454992294311523, |
|
"semantic_entropy": 0.8588122129440308, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.3005464480874318, |
|
"grad_norm": 52.95598574128885, |
|
"learning_rate": 3.2779335113408646e-07, |
|
"logits/chosen": -1.233185052871704, |
|
"logits/rejected": -1.1640207767486572, |
|
"logps/chosen": -0.7508488297462463, |
|
"logps/rejected": -2.4652957916259766, |
|
"loss": 1.0038, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.508486747741699, |
|
"rewards/margins": 17.14447021484375, |
|
"rewards/rejected": -24.6529598236084, |
|
"semantic_entropy": 0.8177651166915894, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.3114754098360657, |
|
"grad_norm": 40.0562568923892, |
|
"learning_rate": 3.1885519212446716e-07, |
|
"logits/chosen": -1.2854266166687012, |
|
"logits/rejected": -1.177534580230713, |
|
"logps/chosen": -0.6793255805969238, |
|
"logps/rejected": -2.2706198692321777, |
|
"loss": 0.9506, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -6.7932562828063965, |
|
"rewards/margins": 15.912942886352539, |
|
"rewards/rejected": -22.70619773864746, |
|
"semantic_entropy": 0.8524688482284546, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.3224043715846996, |
|
"grad_norm": 56.41638001057574, |
|
"learning_rate": 3.0998317990789376e-07, |
|
"logits/chosen": -1.2670646905899048, |
|
"logits/rejected": -1.171144962310791, |
|
"logps/chosen": -0.6692796349525452, |
|
"logps/rejected": -1.934456467628479, |
|
"loss": 1.0026, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -6.692796230316162, |
|
"rewards/margins": 12.65176773071289, |
|
"rewards/rejected": -19.34456443786621, |
|
"semantic_entropy": 0.869337260723114, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 47.30494559887427, |
|
"learning_rate": 3.0118055418614295e-07, |
|
"logits/chosen": -1.3104336261749268, |
|
"logits/rejected": -1.213578224182129, |
|
"logps/chosen": -0.8171396255493164, |
|
"logps/rejected": -2.5085349082946777, |
|
"loss": 0.9846, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -8.17139720916748, |
|
"rewards/margins": 16.91395378112793, |
|
"rewards/rejected": -25.085350036621094, |
|
"semantic_entropy": 0.7933089733123779, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.3442622950819672, |
|
"grad_norm": 55.65069108222119, |
|
"learning_rate": 2.9245052932383707e-07, |
|
"logits/chosen": -1.2602143287658691, |
|
"logits/rejected": -1.1212416887283325, |
|
"logps/chosen": -0.7733426094055176, |
|
"logps/rejected": -2.3373031616210938, |
|
"loss": 1.0585, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -7.733426570892334, |
|
"rewards/margins": 15.639605522155762, |
|
"rewards/rejected": -23.37303352355957, |
|
"semantic_entropy": 0.8259070515632629, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.355191256830601, |
|
"grad_norm": 41.83594828022189, |
|
"learning_rate": 2.83796293174686e-07, |
|
"logits/chosen": -1.1642497777938843, |
|
"logits/rejected": -1.0947132110595703, |
|
"logps/chosen": -0.7484847903251648, |
|
"logps/rejected": -2.3808321952819824, |
|
"loss": 1.0132, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -7.4848480224609375, |
|
"rewards/margins": 16.323474884033203, |
|
"rewards/rejected": -23.808320999145508, |
|
"semantic_entropy": 0.8322114944458008, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.366120218579235, |
|
"grad_norm": 45.85253729227267, |
|
"learning_rate": 2.7522100591741217e-07, |
|
"logits/chosen": -1.234703779220581, |
|
"logits/rejected": -1.1591752767562866, |
|
"logps/chosen": -0.6658716201782227, |
|
"logps/rejected": -2.3456645011901855, |
|
"loss": 0.9989, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -6.658716678619385, |
|
"rewards/margins": 16.797927856445312, |
|
"rewards/rejected": -23.45664405822754, |
|
"semantic_entropy": 0.8470379710197449, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.3770491803278688, |
|
"grad_norm": 47.20204057866064, |
|
"learning_rate": 2.6672779890178046e-07, |
|
"logits/chosen": -1.163450002670288, |
|
"logits/rejected": -1.0469523668289185, |
|
"logps/chosen": -0.7807295918464661, |
|
"logps/rejected": -2.2187490463256836, |
|
"loss": 1.0123, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -7.807295322418213, |
|
"rewards/margins": 14.380197525024414, |
|
"rewards/rejected": -22.1874942779541, |
|
"semantic_entropy": 0.829529881477356, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.3879781420765027, |
|
"grad_norm": 48.43553604807009, |
|
"learning_rate": 2.5831977350515454e-07, |
|
"logits/chosen": -1.1149486303329468, |
|
"logits/rejected": -1.0645884275436401, |
|
"logps/chosen": -0.7764806747436523, |
|
"logps/rejected": -2.346562385559082, |
|
"loss": 1.0361, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -7.764806270599365, |
|
"rewards/margins": 15.700818061828613, |
|
"rewards/rejected": -23.465625762939453, |
|
"semantic_entropy": 0.8258574604988098, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.3989071038251366, |
|
"grad_norm": 50.20523377491874, |
|
"learning_rate": 2.500000000000001e-07, |
|
"logits/chosen": -1.2106841802597046, |
|
"logits/rejected": -1.164466142654419, |
|
"logps/chosen": -0.7233768105506897, |
|
"logps/rejected": -2.620008945465088, |
|
"loss": 0.932, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -7.233767032623291, |
|
"rewards/margins": 18.966323852539062, |
|
"rewards/rejected": -26.200092315673828, |
|
"semantic_entropy": 0.8185870051383972, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.4098360655737705, |
|
"grad_norm": 50.15293641915176, |
|
"learning_rate": 2.4177151643274307e-07, |
|
"logits/chosen": -1.1696977615356445, |
|
"logits/rejected": -1.112188458442688, |
|
"logps/chosen": -0.7105950117111206, |
|
"logps/rejected": -2.4047422409057617, |
|
"loss": 0.9626, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.105950832366943, |
|
"rewards/margins": 16.941471099853516, |
|
"rewards/rejected": -24.047422409057617, |
|
"semantic_entropy": 0.8116961717605591, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.4207650273224044, |
|
"grad_norm": 52.051060632639825, |
|
"learning_rate": 2.3363732751439923e-07, |
|
"logits/chosen": -1.2659627199172974, |
|
"logits/rejected": -1.178022027015686, |
|
"logps/chosen": -0.7824967503547668, |
|
"logps/rejected": -2.2903237342834473, |
|
"loss": 1.0342, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -7.824967861175537, |
|
"rewards/margins": 15.078269958496094, |
|
"rewards/rejected": -22.903236389160156, |
|
"semantic_entropy": 0.8222282528877258, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.4316939890710383, |
|
"grad_norm": 104.74662245786296, |
|
"learning_rate": 2.2560040352337307e-07, |
|
"logits/chosen": -1.1930986642837524, |
|
"logits/rejected": -1.0961310863494873, |
|
"logps/chosen": -0.8049964904785156, |
|
"logps/rejected": -2.6303577423095703, |
|
"loss": 1.0368, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -8.049962997436523, |
|
"rewards/margins": 18.253612518310547, |
|
"rewards/rejected": -26.303577423095703, |
|
"semantic_entropy": 0.8019247055053711, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.4426229508196722, |
|
"grad_norm": 74.14915143886914, |
|
"learning_rate": 2.1766367922083283e-07, |
|
"logits/chosen": -1.2195419073104858, |
|
"logits/rejected": -1.1510334014892578, |
|
"logps/chosen": -0.7229866981506348, |
|
"logps/rejected": -2.4508605003356934, |
|
"loss": 0.9204, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -7.229866981506348, |
|
"rewards/margins": 17.278736114501953, |
|
"rewards/rejected": -24.508602142333984, |
|
"semantic_entropy": 0.8276729583740234, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.453551912568306, |
|
"grad_norm": 40.08916656671079, |
|
"learning_rate": 2.0983005277905347e-07, |
|
"logits/chosen": -1.25788152217865, |
|
"logits/rejected": -1.1829631328582764, |
|
"logps/chosen": -0.7363836765289307, |
|
"logps/rejected": -2.4085285663604736, |
|
"loss": 0.9793, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -7.363836765289307, |
|
"rewards/margins": 16.721446990966797, |
|
"rewards/rejected": -24.085285186767578, |
|
"semantic_entropy": 0.8287376165390015, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.46448087431694, |
|
"grad_norm": 47.3989204733329, |
|
"learning_rate": 2.021023847231202e-07, |
|
"logits/chosen": -1.2234550714492798, |
|
"logits/rejected": -1.1443179845809937, |
|
"logps/chosen": -0.7974756956100464, |
|
"logps/rejected": -2.3043999671936035, |
|
"loss": 0.9905, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -7.974755764007568, |
|
"rewards/margins": 15.069241523742676, |
|
"rewards/rejected": -23.043996810913086, |
|
"semantic_entropy": 0.8342604637145996, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.4754098360655736, |
|
"grad_norm": 108.78018960923754, |
|
"learning_rate": 1.94483496886381e-07, |
|
"logits/chosen": -1.1683439016342163, |
|
"logits/rejected": -1.1087901592254639, |
|
"logps/chosen": -0.6944879293441772, |
|
"logps/rejected": -2.433687925338745, |
|
"loss": 0.8989, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -6.944879055023193, |
|
"rewards/margins": 17.391998291015625, |
|
"rewards/rejected": -24.33687973022461, |
|
"semantic_entropy": 0.8319599032402039, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.4863387978142075, |
|
"grad_norm": 60.19474027091482, |
|
"learning_rate": 1.869761713800254e-07, |
|
"logits/chosen": -1.2412843704223633, |
|
"logits/rejected": -1.1452839374542236, |
|
"logps/chosen": -0.831190288066864, |
|
"logps/rejected": -2.4966881275177, |
|
"loss": 1.0112, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -8.31190299987793, |
|
"rewards/margins": 16.654979705810547, |
|
"rewards/rejected": -24.966880798339844, |
|
"semantic_entropy": 0.800665020942688, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.4972677595628414, |
|
"grad_norm": 45.288193362387666, |
|
"learning_rate": 1.7958314957717064e-07, |
|
"logits/chosen": -1.2326924800872803, |
|
"logits/rejected": -1.1884281635284424, |
|
"logps/chosen": -0.6524280309677124, |
|
"logps/rejected": -2.181318998336792, |
|
"loss": 0.9979, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -6.524280548095703, |
|
"rewards/margins": 15.288909912109375, |
|
"rewards/rejected": -21.813190460205078, |
|
"semantic_entropy": 0.8463915586471558, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.5081967213114753, |
|
"grad_norm": 77.54652900736652, |
|
"learning_rate": 1.7230713111182164e-07, |
|
"logits/chosen": -1.2749425172805786, |
|
"logits/rejected": -1.1991561651229858, |
|
"logps/chosen": -0.6433757543563843, |
|
"logps/rejected": -2.4266154766082764, |
|
"loss": 0.9611, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -6.433757781982422, |
|
"rewards/margins": 17.8323974609375, |
|
"rewards/rejected": -24.266155242919922, |
|
"semantic_entropy": 0.8604837656021118, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.5191256830601092, |
|
"grad_norm": 45.5200345735269, |
|
"learning_rate": 1.651507728930739e-07, |
|
"logits/chosen": -1.1950256824493408, |
|
"logits/rejected": -1.131256103515625, |
|
"logps/chosen": -0.6931561231613159, |
|
"logps/rejected": -2.161853551864624, |
|
"loss": 0.9934, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -6.931561470031738, |
|
"rewards/margins": 14.686975479125977, |
|
"rewards/rejected": -21.61853790283203, |
|
"semantic_entropy": 0.8436753153800964, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.530054644808743, |
|
"grad_norm": 49.242008834049685, |
|
"learning_rate": 1.5811668813491696e-07, |
|
"logits/chosen": -1.3293455839157104, |
|
"logits/rejected": -1.2231751680374146, |
|
"logps/chosen": -0.7694125771522522, |
|
"logps/rejected": -2.4189977645874023, |
|
"loss": 0.978, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -7.694125175476074, |
|
"rewards/margins": 16.495851516723633, |
|
"rewards/rejected": -24.189977645874023, |
|
"semantic_entropy": 0.8082691431045532, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.540983606557377, |
|
"grad_norm": 44.65399870377938, |
|
"learning_rate": 1.5120744540199343e-07, |
|
"logits/chosen": -1.2114274501800537, |
|
"logits/rejected": -1.1308143138885498, |
|
"logps/chosen": -0.7381525635719299, |
|
"logps/rejected": -2.3527631759643555, |
|
"loss": 0.9314, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -7.381524562835693, |
|
"rewards/margins": 16.146106719970703, |
|
"rewards/rejected": -23.527631759643555, |
|
"semantic_entropy": 0.8333342671394348, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.5519125683060109, |
|
"grad_norm": 52.47246084045148, |
|
"learning_rate": 1.4442556767166369e-07, |
|
"logits/chosen": -1.2004725933074951, |
|
"logits/rejected": -1.1394346952438354, |
|
"logps/chosen": -0.7631191611289978, |
|
"logps/rejected": -2.4908859729766846, |
|
"loss": 1.0138, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -7.631192207336426, |
|
"rewards/margins": 17.277666091918945, |
|
"rewards/rejected": -24.908855438232422, |
|
"semantic_entropy": 0.8088520169258118, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.5628415300546448, |
|
"grad_norm": 39.161062372274245, |
|
"learning_rate": 1.377735314127148e-07, |
|
"logits/chosen": -1.1989295482635498, |
|
"logits/rejected": -1.0892112255096436, |
|
"logps/chosen": -0.754266083240509, |
|
"logps/rejected": -2.3557486534118652, |
|
"loss": 0.9097, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.542661190032959, |
|
"rewards/margins": 16.014827728271484, |
|
"rewards/rejected": -23.5574893951416, |
|
"semantic_entropy": 0.8200591206550598, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.5737704918032787, |
|
"grad_norm": 57.53753951235613, |
|
"learning_rate": 1.312537656810549e-07, |
|
"logits/chosen": -1.1801402568817139, |
|
"logits/rejected": -1.1305280923843384, |
|
"logps/chosen": -0.8796719312667847, |
|
"logps/rejected": -2.6609649658203125, |
|
"loss": 1.0603, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.79671859741211, |
|
"rewards/margins": 17.812931060791016, |
|
"rewards/rejected": -26.609649658203125, |
|
"semantic_entropy": 0.7918781042098999, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.5846994535519126, |
|
"grad_norm": 51.68795375166876, |
|
"learning_rate": 1.2486865123271866e-07, |
|
"logits/chosen": -1.2510040998458862, |
|
"logits/rejected": -1.1513909101486206, |
|
"logps/chosen": -0.7905360460281372, |
|
"logps/rejected": -2.450331449508667, |
|
"loss": 0.988, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -7.905358791351318, |
|
"rewards/margins": 16.59795570373535, |
|
"rewards/rejected": -24.503314971923828, |
|
"semantic_entropy": 0.811559796333313, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.5956284153005464, |
|
"grad_norm": 53.36439634728435, |
|
"learning_rate": 1.1862051965451214e-07, |
|
"logits/chosen": -1.2445173263549805, |
|
"logits/rejected": -1.1288838386535645, |
|
"logps/chosen": -0.7035760283470154, |
|
"logps/rejected": -2.4538397789001465, |
|
"loss": 0.9645, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -7.035760402679443, |
|
"rewards/margins": 17.502635955810547, |
|
"rewards/rejected": -24.53839683532715, |
|
"semantic_entropy": 0.8314288258552551, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.6065573770491803, |
|
"grad_norm": 52.77186710891425, |
|
"learning_rate": 1.1251165251261047e-07, |
|
"logits/chosen": -1.1849864721298218, |
|
"logits/rejected": -1.111053466796875, |
|
"logps/chosen": -0.6819809675216675, |
|
"logps/rejected": -2.3596489429473877, |
|
"loss": 0.9183, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -6.819809913635254, |
|
"rewards/margins": 16.77667999267578, |
|
"rewards/rejected": -23.596487045288086, |
|
"semantic_entropy": 0.8518983721733093, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.6174863387978142, |
|
"grad_norm": 51.04954161674348, |
|
"learning_rate": 1.0654428051942138e-07, |
|
"logits/chosen": -1.185575246810913, |
|
"logits/rejected": -1.1258459091186523, |
|
"logps/chosen": -0.8496238589286804, |
|
"logps/rejected": -2.4404985904693604, |
|
"loss": 1.0108, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -8.49623966217041, |
|
"rewards/margins": 15.908746719360352, |
|
"rewards/rejected": -24.404987335205078, |
|
"semantic_entropy": 0.8217931985855103, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.6284153005464481, |
|
"grad_norm": 44.78590940359996, |
|
"learning_rate": 1.0072058271901978e-07, |
|
"logits/chosen": -1.1844556331634521, |
|
"logits/rejected": -1.096343994140625, |
|
"logps/chosen": -0.7650187611579895, |
|
"logps/rejected": -2.4417996406555176, |
|
"loss": 0.9889, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -7.6501874923706055, |
|
"rewards/margins": 16.767807006835938, |
|
"rewards/rejected": -24.41799545288086, |
|
"semantic_entropy": 0.8134136199951172, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.639344262295082, |
|
"grad_norm": 41.35825995367568, |
|
"learning_rate": 9.504268569144763e-08, |
|
"logits/chosen": -1.2524887323379517, |
|
"logits/rejected": -1.1518092155456543, |
|
"logps/chosen": -0.6517141461372375, |
|
"logps/rejected": -2.495558977127075, |
|
"loss": 0.9019, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -6.517141819000244, |
|
"rewards/margins": 18.438446044921875, |
|
"rewards/rejected": -24.95558738708496, |
|
"semantic_entropy": 0.8249934911727905, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.650273224043716, |
|
"grad_norm": 49.17981910139592, |
|
"learning_rate": 8.951266277617325e-08, |
|
"logits/chosen": -1.174800992012024, |
|
"logits/rejected": -1.0904661417007446, |
|
"logps/chosen": -0.6784438490867615, |
|
"logps/rejected": -2.281085968017578, |
|
"loss": 0.9285, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -6.784438133239746, |
|
"rewards/margins": 16.026418685913086, |
|
"rewards/rejected": -22.81085968017578, |
|
"semantic_entropy": 0.8071689605712891, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.6612021857923498, |
|
"grad_norm": 55.44235074285089, |
|
"learning_rate": 8.413253331499049e-08, |
|
"logits/chosen": -1.2523894309997559, |
|
"logits/rejected": -1.1709582805633545, |
|
"logps/chosen": -0.7902460694313049, |
|
"logps/rejected": -2.353731155395508, |
|
"loss": 0.9701, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -7.90246057510376, |
|
"rewards/margins": 15.634851455688477, |
|
"rewards/rejected": -23.537311553955078, |
|
"semantic_entropy": 0.8497117757797241, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.6721311475409837, |
|
"grad_norm": 46.939667617709574, |
|
"learning_rate": 7.8904261914637e-08, |
|
"logits/chosen": -1.2579504251480103, |
|
"logits/rejected": -1.2005599737167358, |
|
"logps/chosen": -0.7765697240829468, |
|
"logps/rejected": -2.3420188426971436, |
|
"loss": 1.0131, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -7.765698432922363, |
|
"rewards/margins": 15.654492378234863, |
|
"rewards/rejected": -23.420190811157227, |
|
"semantic_entropy": 0.8250833749771118, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.6830601092896176, |
|
"grad_norm": 67.60864064581558, |
|
"learning_rate": 7.382975772939865e-08, |
|
"logits/chosen": -1.2617108821868896, |
|
"logits/rejected": -1.2064878940582275, |
|
"logps/chosen": -0.7011424899101257, |
|
"logps/rejected": -2.4052655696868896, |
|
"loss": 0.9795, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -7.011425971984863, |
|
"rewards/margins": 17.04123306274414, |
|
"rewards/rejected": -24.052656173706055, |
|
"semantic_entropy": 0.8459088206291199, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.6939890710382515, |
|
"grad_norm": 68.25252330535682, |
|
"learning_rate": 6.891087376396315e-08, |
|
"logits/chosen": -1.1619203090667725, |
|
"logits/rejected": -1.1151115894317627, |
|
"logps/chosen": -0.6944946050643921, |
|
"logps/rejected": -2.123880624771118, |
|
"loss": 1.0529, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.9449462890625, |
|
"rewards/margins": 14.293858528137207, |
|
"rewards/rejected": -21.23880386352539, |
|
"semantic_entropy": 0.8555929064750671, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.7049180327868854, |
|
"grad_norm": 58.94990698167926, |
|
"learning_rate": 6.414940619677734e-08, |
|
"logits/chosen": -1.21394944190979, |
|
"logits/rejected": -1.148568034172058, |
|
"logps/chosen": -0.7798916697502136, |
|
"logps/rejected": -2.334639072418213, |
|
"loss": 1.0831, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -7.798917293548584, |
|
"rewards/margins": 15.547472953796387, |
|
"rewards/rejected": -23.346389770507812, |
|
"semantic_entropy": 0.8230711221694946, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.7158469945355193, |
|
"grad_norm": 54.978556677024066, |
|
"learning_rate": 5.954709372415523e-08, |
|
"logits/chosen": -1.2210636138916016, |
|
"logits/rejected": -1.134007453918457, |
|
"logps/chosen": -0.8276329040527344, |
|
"logps/rejected": -2.5226263999938965, |
|
"loss": 1.0036, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -8.27632999420166, |
|
"rewards/margins": 16.949934005737305, |
|
"rewards/rejected": -25.22626304626465, |
|
"semantic_entropy": 0.8026347160339355, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.7267759562841531, |
|
"grad_norm": 58.44169076386046, |
|
"learning_rate": 5.5105616925376296e-08, |
|
"logits/chosen": -1.3411870002746582, |
|
"logits/rejected": -1.1771245002746582, |
|
"logps/chosen": -0.7094103097915649, |
|
"logps/rejected": -2.3087127208709717, |
|
"loss": 0.9863, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -7.094101905822754, |
|
"rewards/margins": 15.993026733398438, |
|
"rewards/rejected": -23.08713150024414, |
|
"semantic_entropy": 0.8216876983642578, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.737704918032787, |
|
"grad_norm": 53.528578956238725, |
|
"learning_rate": 5.082659764900482e-08, |
|
"logits/chosen": -1.2835462093353271, |
|
"logits/rejected": -1.2009773254394531, |
|
"logps/chosen": -0.6398060917854309, |
|
"logps/rejected": -2.0710248947143555, |
|
"loss": 1.0059, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -6.3980607986450195, |
|
"rewards/margins": 14.312187194824219, |
|
"rewards/rejected": -20.710247039794922, |
|
"semantic_entropy": 0.8597515225410461, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.748633879781421, |
|
"grad_norm": 59.57829603969701, |
|
"learning_rate": 4.6711598420656976e-08, |
|
"logits/chosen": -1.2482662200927734, |
|
"logits/rejected": -1.1601988077163696, |
|
"logps/chosen": -0.7208329439163208, |
|
"logps/rejected": -2.314363956451416, |
|
"loss": 0.9552, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -7.208329200744629, |
|
"rewards/margins": 15.935308456420898, |
|
"rewards/rejected": -23.143640518188477, |
|
"semantic_entropy": 0.8409850001335144, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.748633879781421, |
|
"eval_logits/chosen": -1.5359925031661987, |
|
"eval_logits/rejected": -1.4433752298355103, |
|
"eval_logps/chosen": -0.8280417323112488, |
|
"eval_logps/rejected": -2.125033140182495, |
|
"eval_loss": 1.4401862621307373, |
|
"eval_rewards/accuracies": 0.8795180916786194, |
|
"eval_rewards/chosen": -8.280416488647461, |
|
"eval_rewards/margins": 12.969916343688965, |
|
"eval_rewards/rejected": -21.25033187866211, |
|
"eval_runtime": 33.6039, |
|
"eval_samples_per_second": 39.222, |
|
"eval_semantic_entropy": 0.8376908898353577, |
|
"eval_steps_per_second": 2.47, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.7595628415300546, |
|
"grad_norm": 44.1368033825106, |
|
"learning_rate": 4.2762121872428615e-08, |
|
"logits/chosen": -1.2641065120697021, |
|
"logits/rejected": -1.2107889652252197, |
|
"logps/chosen": -0.6843208074569702, |
|
"logps/rejected": -2.0283682346343994, |
|
"loss": 1.0256, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -6.843208312988281, |
|
"rewards/margins": 13.440475463867188, |
|
"rewards/rejected": -20.28368377685547, |
|
"semantic_entropy": 0.8609482645988464, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.7704918032786885, |
|
"grad_norm": 60.8116220315975, |
|
"learning_rate": 3.897961019419516e-08, |
|
"logits/chosen": -1.242765188217163, |
|
"logits/rejected": -1.111221194267273, |
|
"logps/chosen": -0.6914607882499695, |
|
"logps/rejected": -2.5515542030334473, |
|
"loss": 1.026, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -6.914607048034668, |
|
"rewards/margins": 18.600933074951172, |
|
"rewards/rejected": -25.515541076660156, |
|
"semantic_entropy": 0.8368776440620422, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.7814207650273224, |
|
"grad_norm": 48.041636594141835, |
|
"learning_rate": 3.536544460698143e-08, |
|
"logits/chosen": -1.2581889629364014, |
|
"logits/rejected": -1.2215464115142822, |
|
"logps/chosen": -0.7543720006942749, |
|
"logps/rejected": -2.438751220703125, |
|
"loss": 1.0363, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -7.5437211990356445, |
|
"rewards/margins": 16.843791961669922, |
|
"rewards/rejected": -24.387516021728516, |
|
"semantic_entropy": 0.8024908304214478, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.7923497267759563, |
|
"grad_norm": 46.211811466738105, |
|
"learning_rate": 3.192094485859526e-08, |
|
"logits/chosen": -1.2139607667922974, |
|
"logits/rejected": -1.1563109159469604, |
|
"logps/chosen": -0.7942629456520081, |
|
"logps/rejected": -2.2374846935272217, |
|
"loss": 0.9534, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -7.942629337310791, |
|
"rewards/margins": 14.432218551635742, |
|
"rewards/rejected": -22.374849319458008, |
|
"semantic_entropy": 0.8313804864883423, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.8032786885245902, |
|
"grad_norm": 51.498446681364456, |
|
"learning_rate": 2.8647368741709367e-08, |
|
"logits/chosen": -1.307348608970642, |
|
"logits/rejected": -1.172135353088379, |
|
"logps/chosen": -0.8334323167800903, |
|
"logps/rejected": -2.4974188804626465, |
|
"loss": 0.9931, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -8.334322929382324, |
|
"rewards/margins": 16.63986587524414, |
|
"rewards/rejected": -24.974185943603516, |
|
"semantic_entropy": 0.7853243350982666, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.814207650273224, |
|
"grad_norm": 65.74184699216715, |
|
"learning_rate": 2.5545911634565265e-08, |
|
"logits/chosen": -1.2999436855316162, |
|
"logits/rejected": -1.1716783046722412, |
|
"logps/chosen": -0.7435690760612488, |
|
"logps/rejected": -2.767209529876709, |
|
"loss": 0.9785, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -7.435690402984619, |
|
"rewards/margins": 20.236404418945312, |
|
"rewards/rejected": -27.672094345092773, |
|
"semantic_entropy": 0.8089765310287476, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.825136612021858, |
|
"grad_norm": 53.69755289327063, |
|
"learning_rate": 2.261770606446983e-08, |
|
"logits/chosen": -1.3077576160430908, |
|
"logits/rejected": -1.2317638397216797, |
|
"logps/chosen": -0.7318333387374878, |
|
"logps/rejected": -1.9953196048736572, |
|
"loss": 0.9652, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -7.318333625793457, |
|
"rewards/margins": 12.634860038757324, |
|
"rewards/rejected": -19.95319366455078, |
|
"semantic_entropy": 0.8394795656204224, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.8360655737704918, |
|
"grad_norm": 48.57345276974053, |
|
"learning_rate": 1.9863821294241522e-08, |
|
"logits/chosen": -1.2126185894012451, |
|
"logits/rejected": -1.10856032371521, |
|
"logps/chosen": -0.7022706866264343, |
|
"logps/rejected": -2.3867998123168945, |
|
"loss": 0.9824, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -7.022706508636475, |
|
"rewards/margins": 16.84528923034668, |
|
"rewards/rejected": -23.86799430847168, |
|
"semantic_entropy": 0.8377873301506042, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.8469945355191257, |
|
"grad_norm": 47.58974510921998, |
|
"learning_rate": 1.7285262931759082e-08, |
|
"logits/chosen": -1.170081615447998, |
|
"logits/rejected": -1.1226613521575928, |
|
"logps/chosen": -0.709827721118927, |
|
"logps/rejected": -2.499692440032959, |
|
"loss": 1.0049, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.0982770919799805, |
|
"rewards/margins": 17.89864730834961, |
|
"rewards/rejected": -24.99692726135254, |
|
"semantic_entropy": 0.8213443756103516, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.8579234972677594, |
|
"grad_norm": 43.861213908082526, |
|
"learning_rate": 1.4882972562753615e-08, |
|
"logits/chosen": -1.2278581857681274, |
|
"logits/rejected": -1.1186041831970215, |
|
"logps/chosen": -0.6293253898620605, |
|
"logps/rejected": -2.4325814247131348, |
|
"loss": 0.9317, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -6.293253421783447, |
|
"rewards/margins": 18.032560348510742, |
|
"rewards/rejected": -24.325815200805664, |
|
"semantic_entropy": 0.8304460644721985, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.8688524590163933, |
|
"grad_norm": 46.25639636622948, |
|
"learning_rate": 1.2657827406979404e-08, |
|
"logits/chosen": -1.2755509614944458, |
|
"logits/rejected": -1.1995421648025513, |
|
"logps/chosen": -0.7046025991439819, |
|
"logps/rejected": -2.2888636589050293, |
|
"loss": 0.9631, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -7.046026706695557, |
|
"rewards/margins": 15.842610359191895, |
|
"rewards/rejected": -22.88863754272461, |
|
"semantic_entropy": 0.8367988467216492, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.8797814207650272, |
|
"grad_norm": 43.742641732125044, |
|
"learning_rate": 1.0610639997888915e-08, |
|
"logits/chosen": -1.144809603691101, |
|
"logits/rejected": -1.0996748208999634, |
|
"logps/chosen": -0.6617113947868347, |
|
"logps/rejected": -2.071277141571045, |
|
"loss": 0.9799, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -6.6171135902404785, |
|
"rewards/margins": 14.095659255981445, |
|
"rewards/rejected": -20.712770462036133, |
|
"semantic_entropy": 0.8550912141799927, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.890710382513661, |
|
"grad_norm": 43.8556750169825, |
|
"learning_rate": 8.742157885927804e-09, |
|
"logits/chosen": -1.264917016029358, |
|
"logits/rejected": -1.1865818500518799, |
|
"logps/chosen": -0.7975755333900452, |
|
"logps/rejected": -2.4832332134246826, |
|
"loss": 0.9288, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -7.975754737854004, |
|
"rewards/margins": 16.856576919555664, |
|
"rewards/rejected": -24.832332611083984, |
|
"semantic_entropy": 0.8138486742973328, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.901639344262295, |
|
"grad_norm": 49.4352171489155, |
|
"learning_rate": 7.053063365559997e-09, |
|
"logits/chosen": -1.2424798011779785, |
|
"logits/rejected": -1.1954628229141235, |
|
"logps/chosen": -0.6465862393379211, |
|
"logps/rejected": -2.410433769226074, |
|
"loss": 0.8832, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -6.465862274169922, |
|
"rewards/margins": 17.638477325439453, |
|
"rewards/rejected": -24.104337692260742, |
|
"semantic_entropy": 0.8361645936965942, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.9125683060109289, |
|
"grad_norm": 79.71459811079978, |
|
"learning_rate": 5.543973226120935e-09, |
|
"logits/chosen": -1.2222373485565186, |
|
"logits/rejected": -1.1502609252929688, |
|
"logps/chosen": -0.7222265005111694, |
|
"logps/rejected": -2.1863186359405518, |
|
"loss": 0.9862, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.222264289855957, |
|
"rewards/margins": 14.640920639038086, |
|
"rewards/rejected": -21.86318588256836, |
|
"semantic_entropy": 0.8562089800834656, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.9234972677595628, |
|
"grad_norm": 53.36096318687935, |
|
"learning_rate": 4.215438526591064e-09, |
|
"logits/chosen": -1.2770297527313232, |
|
"logits/rejected": -1.2093579769134521, |
|
"logps/chosen": -0.6959497332572937, |
|
"logps/rejected": -2.2840352058410645, |
|
"loss": 0.9871, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -6.959497928619385, |
|
"rewards/margins": 15.880853652954102, |
|
"rewards/rejected": -22.840351104736328, |
|
"semantic_entropy": 0.849157452583313, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.9344262295081966, |
|
"grad_norm": 38.81388371132877, |
|
"learning_rate": 3.0679443943712467e-09, |
|
"logits/chosen": -1.3255574703216553, |
|
"logits/rejected": -1.2370083332061768, |
|
"logps/chosen": -0.7685250639915466, |
|
"logps/rejected": -2.3793647289276123, |
|
"loss": 0.9499, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -7.685250759124756, |
|
"rewards/margins": 16.108396530151367, |
|
"rewards/rejected": -23.79364585876465, |
|
"semantic_entropy": 0.8148989677429199, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.9453551912568305, |
|
"grad_norm": 43.52784854249128, |
|
"learning_rate": 2.1019098481337426e-09, |
|
"logits/chosen": -1.271645188331604, |
|
"logits/rejected": -1.1847755908966064, |
|
"logps/chosen": -0.7262202501296997, |
|
"logps/rejected": -2.480203151702881, |
|
"loss": 0.9648, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -7.262202262878418, |
|
"rewards/margins": 17.53982925415039, |
|
"rewards/rejected": -24.802032470703125, |
|
"semantic_entropy": 0.8072282671928406, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.9562841530054644, |
|
"grad_norm": 54.276319170574524, |
|
"learning_rate": 1.3176876448135477e-09, |
|
"logits/chosen": -1.311767816543579, |
|
"logits/rejected": -1.1933305263519287, |
|
"logps/chosen": -0.8360783457756042, |
|
"logps/rejected": -2.5562148094177246, |
|
"loss": 1.0277, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -8.360783576965332, |
|
"rewards/margins": 17.201366424560547, |
|
"rewards/rejected": -25.562149047851562, |
|
"semantic_entropy": 0.8203535079956055, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.9672131147540983, |
|
"grad_norm": 50.371762692382795, |
|
"learning_rate": 7.155641507955445e-10, |
|
"logits/chosen": -1.2078804969787598, |
|
"logits/rejected": -1.1214892864227295, |
|
"logps/chosen": -0.6584422588348389, |
|
"logps/rejected": -2.1391983032226562, |
|
"loss": 1.026, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -6.5844221115112305, |
|
"rewards/margins": 14.807560920715332, |
|
"rewards/rejected": -21.391983032226562, |
|
"semantic_entropy": 0.8509858250617981, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.9781420765027322, |
|
"grad_norm": 55.00348286428405, |
|
"learning_rate": 2.957592373452056e-10, |
|
"logits/chosen": -1.2071561813354492, |
|
"logits/rejected": -1.1362513303756714, |
|
"logps/chosen": -0.719018280506134, |
|
"logps/rejected": -2.406873941421509, |
|
"loss": 0.9953, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -7.190183162689209, |
|
"rewards/margins": 16.87855339050293, |
|
"rewards/rejected": -24.068737030029297, |
|
"semantic_entropy": 0.8274633288383484, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.989071038251366, |
|
"grad_norm": 43.167771529061575, |
|
"learning_rate": 5.842620032053824e-11, |
|
"logits/chosen": -1.2589218616485596, |
|
"logits/rejected": -1.189516544342041, |
|
"logps/chosen": -0.7029792666435242, |
|
"logps/rejected": -2.2207939624786377, |
|
"loss": 0.9075, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.029792785644531, |
|
"rewards/margins": 15.178146362304688, |
|
"rewards/rejected": -22.20793914794922, |
|
"semantic_entropy": 0.8508146405220032, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.9978142076502732, |
|
"step": 914, |
|
"total_flos": 0.0, |
|
"train_loss": 1.6402891297830795, |
|
"train_runtime": 11806.3913, |
|
"train_samples_per_second": 9.92, |
|
"train_steps_per_second": 0.077 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 914, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|