|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 711, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10548523206751055, |
|
"grad_norm": 30.46313288029562, |
|
"learning_rate": 3.472222222222222e-07, |
|
"log_odds_chosen": 0.16534051299095154, |
|
"log_odds_ratio": -0.6545013189315796, |
|
"logits/chosen": -0.41492316126823425, |
|
"logits/rejected": -0.435069739818573, |
|
"logps/chosen": -0.9887133836746216, |
|
"logps/rejected": -1.1123149394989014, |
|
"loss": 3.875, |
|
"nll_loss": 3.5094263553619385, |
|
"rewards/accuracies": 0.4300000071525574, |
|
"rewards/chosen": -0.09887134283781052, |
|
"rewards/margins": 0.012360147200524807, |
|
"rewards/rejected": -0.1112314984202385, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2109704641350211, |
|
"grad_norm": 74.88568859723209, |
|
"learning_rate": 6.944444444444444e-07, |
|
"log_odds_chosen": 0.10536957532167435, |
|
"log_odds_ratio": -0.6801052689552307, |
|
"logits/chosen": -0.9151662588119507, |
|
"logits/rejected": -0.9367119073867798, |
|
"logps/chosen": -1.117153286933899, |
|
"logps/rejected": -1.173445463180542, |
|
"loss": 0.8198, |
|
"nll_loss": 0.7042462229728699, |
|
"rewards/accuracies": 0.5299999713897705, |
|
"rewards/chosen": -0.11171531677246094, |
|
"rewards/margins": 0.005629217717796564, |
|
"rewards/rejected": -0.11734454333782196, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.31645569620253167, |
|
"grad_norm": 4.304125765256844, |
|
"learning_rate": 9.953051643192487e-07, |
|
"log_odds_chosen": 0.1444670855998993, |
|
"log_odds_ratio": -0.6669880747795105, |
|
"logits/chosen": -0.8349351286888123, |
|
"logits/rejected": -0.8575166463851929, |
|
"logps/chosen": -0.9019416570663452, |
|
"logps/rejected": -0.9740175604820251, |
|
"loss": 0.7274, |
|
"nll_loss": 0.6248427033424377, |
|
"rewards/accuracies": 0.4399999976158142, |
|
"rewards/chosen": -0.09019416570663452, |
|
"rewards/margins": 0.007207601796835661, |
|
"rewards/rejected": -0.09740178287029266, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4219409282700422, |
|
"grad_norm": 3.3433596753401935, |
|
"learning_rate": 9.561815336463224e-07, |
|
"log_odds_chosen": 0.18847505748271942, |
|
"log_odds_ratio": -0.6345129609107971, |
|
"logits/chosen": -0.8627865314483643, |
|
"logits/rejected": -0.8871102333068848, |
|
"logps/chosen": -0.774340033531189, |
|
"logps/rejected": -0.8760174512863159, |
|
"loss": 0.6753, |
|
"nll_loss": 0.6059034466743469, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0774340033531189, |
|
"rewards/margins": 0.010167734697461128, |
|
"rewards/rejected": -0.08760173618793488, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5274261603375527, |
|
"grad_norm": 6.317691511765167, |
|
"learning_rate": 9.170579029733959e-07, |
|
"log_odds_chosen": 0.18306466937065125, |
|
"log_odds_ratio": -0.6469370126724243, |
|
"logits/chosen": -0.8607066869735718, |
|
"logits/rejected": -0.8643490672111511, |
|
"logps/chosen": -0.5890992283821106, |
|
"logps/rejected": -0.6761559247970581, |
|
"loss": 0.6409, |
|
"nll_loss": 0.6029787659645081, |
|
"rewards/accuracies": 0.4699999988079071, |
|
"rewards/chosen": -0.05890992656350136, |
|
"rewards/margins": 0.00870567373931408, |
|
"rewards/rejected": -0.06761559844017029, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6329113924050633, |
|
"grad_norm": 5.056963941311914, |
|
"learning_rate": 8.779342723004695e-07, |
|
"log_odds_chosen": 0.22126027941703796, |
|
"log_odds_ratio": -0.6363757848739624, |
|
"logits/chosen": -0.9035418629646301, |
|
"logits/rejected": -0.9076377749443054, |
|
"logps/chosen": -0.5781998634338379, |
|
"logps/rejected": -0.6600324511528015, |
|
"loss": 0.6416, |
|
"nll_loss": 0.5581729412078857, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.05781999230384827, |
|
"rewards/margins": 0.008183254860341549, |
|
"rewards/rejected": -0.06600324809551239, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7383966244725738, |
|
"grad_norm": 3.205932624220109, |
|
"learning_rate": 8.38810641627543e-07, |
|
"log_odds_chosen": 0.3526044189929962, |
|
"log_odds_ratio": -0.5982345342636108, |
|
"logits/chosen": -0.8294059634208679, |
|
"logits/rejected": -0.8529558777809143, |
|
"logps/chosen": -0.5301632881164551, |
|
"logps/rejected": -0.661786675453186, |
|
"loss": 0.5897, |
|
"nll_loss": 0.5430833697319031, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.05301632732152939, |
|
"rewards/margins": 0.013162333518266678, |
|
"rewards/rejected": -0.06617865711450577, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.8438818565400844, |
|
"grad_norm": 2.682686637504157, |
|
"learning_rate": 7.996870109546165e-07, |
|
"log_odds_chosen": 0.2721116840839386, |
|
"log_odds_ratio": -0.6195984482765198, |
|
"logits/chosen": -0.8821828365325928, |
|
"logits/rejected": -0.9078083634376526, |
|
"logps/chosen": -0.5987895131111145, |
|
"logps/rejected": -0.757358193397522, |
|
"loss": 0.6351, |
|
"nll_loss": 0.5751867890357971, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.05987895652651787, |
|
"rewards/margins": 0.01585685834288597, |
|
"rewards/rejected": -0.07573581486940384, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9493670886075949, |
|
"grad_norm": 1.943018301402564, |
|
"learning_rate": 7.605633802816901e-07, |
|
"log_odds_chosen": 0.19566155970096588, |
|
"log_odds_ratio": -0.6696605682373047, |
|
"logits/chosen": -0.8021306395530701, |
|
"logits/rejected": -0.8081836104393005, |
|
"logps/chosen": -0.509245753288269, |
|
"logps/rejected": -0.6033316254615784, |
|
"loss": 0.6222, |
|
"nll_loss": 0.5073065161705017, |
|
"rewards/accuracies": 0.5099999904632568, |
|
"rewards/chosen": -0.05092458426952362, |
|
"rewards/margins": 0.00940858107060194, |
|
"rewards/rejected": -0.06033316254615784, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.0548523206751055, |
|
"grad_norm": 1.7595986013614073, |
|
"learning_rate": 7.214397496087636e-07, |
|
"log_odds_chosen": 0.3115460276603699, |
|
"log_odds_ratio": -0.6099376678466797, |
|
"logits/chosen": -0.8336898684501648, |
|
"logits/rejected": -0.8424633145332336, |
|
"logps/chosen": -0.5518218874931335, |
|
"logps/rejected": -0.6624475717544556, |
|
"loss": 0.599, |
|
"nll_loss": 0.506114661693573, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.055182188749313354, |
|
"rewards/margins": 0.011062567122280598, |
|
"rewards/rejected": -0.06624475121498108, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.160337552742616, |
|
"grad_norm": 2.131980857522191, |
|
"learning_rate": 6.823161189358372e-07, |
|
"log_odds_chosen": 0.3637891113758087, |
|
"log_odds_ratio": -0.598861575126648, |
|
"logits/chosen": -0.8110294342041016, |
|
"logits/rejected": -0.8618900775909424, |
|
"logps/chosen": -0.5288816690444946, |
|
"logps/rejected": -0.7046244144439697, |
|
"loss": 0.6147, |
|
"nll_loss": 0.5313621163368225, |
|
"rewards/accuracies": 0.6100000143051147, |
|
"rewards/chosen": -0.0528881661593914, |
|
"rewards/margins": 0.017574286088347435, |
|
"rewards/rejected": -0.07046245038509369, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.2658227848101267, |
|
"grad_norm": 1.4186554490026797, |
|
"learning_rate": 6.431924882629108e-07, |
|
"log_odds_chosen": 0.3448210060596466, |
|
"log_odds_ratio": -0.590344250202179, |
|
"logits/chosen": -0.8463892340660095, |
|
"logits/rejected": -0.8793061971664429, |
|
"logps/chosen": -0.5530957579612732, |
|
"logps/rejected": -0.7298399209976196, |
|
"loss": 0.5784, |
|
"nll_loss": 0.5125244855880737, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": -0.05530957877635956, |
|
"rewards/margins": 0.017674420028924942, |
|
"rewards/rejected": -0.0729840025305748, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.371308016877637, |
|
"grad_norm": 1.5618238810197242, |
|
"learning_rate": 6.040688575899842e-07, |
|
"log_odds_chosen": 0.3834618330001831, |
|
"log_odds_ratio": -0.5801408290863037, |
|
"logits/chosen": -0.7922927141189575, |
|
"logits/rejected": -0.807830274105072, |
|
"logps/chosen": -0.5033449530601501, |
|
"logps/rejected": -0.6853150725364685, |
|
"loss": 0.6035, |
|
"nll_loss": 0.5220791697502136, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.05033449828624725, |
|
"rewards/margins": 0.018197014927864075, |
|
"rewards/rejected": -0.06853151321411133, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.4767932489451476, |
|
"grad_norm": 1.6694346253469523, |
|
"learning_rate": 5.649452269170579e-07, |
|
"log_odds_chosen": 0.23677273094654083, |
|
"log_odds_ratio": -0.6395828127861023, |
|
"logits/chosen": -0.7438558340072632, |
|
"logits/rejected": -0.76462721824646, |
|
"logps/chosen": -0.5047862529754639, |
|
"logps/rejected": -0.5797430276870728, |
|
"loss": 0.5862, |
|
"nll_loss": 0.5142392516136169, |
|
"rewards/accuracies": 0.5099999904632568, |
|
"rewards/chosen": -0.05047862231731415, |
|
"rewards/margins": 0.007495685946196318, |
|
"rewards/rejected": -0.05797431617975235, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.5822784810126582, |
|
"grad_norm": 1.6654016970385481, |
|
"learning_rate": 5.258215962441315e-07, |
|
"log_odds_chosen": 0.3365418314933777, |
|
"log_odds_ratio": -0.5955182313919067, |
|
"logits/chosen": -0.761318564414978, |
|
"logits/rejected": -0.7731548547744751, |
|
"logps/chosen": -0.49533072113990784, |
|
"logps/rejected": -0.6420021057128906, |
|
"loss": 0.5995, |
|
"nll_loss": 0.5266813039779663, |
|
"rewards/accuracies": 0.5400000214576721, |
|
"rewards/chosen": -0.04953307658433914, |
|
"rewards/margins": 0.014667129144072533, |
|
"rewards/rejected": -0.06420020759105682, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.6877637130801688, |
|
"grad_norm": 1.4385786677568302, |
|
"learning_rate": 4.86697965571205e-07, |
|
"log_odds_chosen": 0.27425241470336914, |
|
"log_odds_ratio": -0.6355130076408386, |
|
"logits/chosen": -0.7875126600265503, |
|
"logits/rejected": -0.7872836589813232, |
|
"logps/chosen": -0.5322539806365967, |
|
"logps/rejected": -0.6498347520828247, |
|
"loss": 0.5813, |
|
"nll_loss": 0.5109381675720215, |
|
"rewards/accuracies": 0.5099999904632568, |
|
"rewards/chosen": -0.05322539806365967, |
|
"rewards/margins": 0.011758077889680862, |
|
"rewards/rejected": -0.06498347222805023, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.7932489451476794, |
|
"grad_norm": 1.3133499382164198, |
|
"learning_rate": 4.475743348982786e-07, |
|
"log_odds_chosen": 0.4066045880317688, |
|
"log_odds_ratio": -0.5856396555900574, |
|
"logits/chosen": -0.8323702812194824, |
|
"logits/rejected": -0.8558816313743591, |
|
"logps/chosen": -0.5287883281707764, |
|
"logps/rejected": -0.7097746133804321, |
|
"loss": 0.5983, |
|
"nll_loss": 0.5335114002227783, |
|
"rewards/accuracies": 0.5600000023841858, |
|
"rewards/chosen": -0.052878838032484055, |
|
"rewards/margins": 0.018098626285791397, |
|
"rewards/rejected": -0.07097746431827545, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.8987341772151898, |
|
"grad_norm": 1.473901535326238, |
|
"learning_rate": 4.084507042253521e-07, |
|
"log_odds_chosen": 0.41008836030960083, |
|
"log_odds_ratio": -0.5757314562797546, |
|
"logits/chosen": -0.8004408478736877, |
|
"logits/rejected": -0.8020641207695007, |
|
"logps/chosen": -0.550152599811554, |
|
"logps/rejected": -0.7004598379135132, |
|
"loss": 0.586, |
|
"nll_loss": 0.5043174028396606, |
|
"rewards/accuracies": 0.5899999737739563, |
|
"rewards/chosen": -0.055015262216329575, |
|
"rewards/margins": 0.015030724927783012, |
|
"rewards/rejected": -0.07004599273204803, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.0042194092827006, |
|
"grad_norm": 1.2607531968315342, |
|
"learning_rate": 3.6932707355242563e-07, |
|
"log_odds_chosen": 0.2668948769569397, |
|
"log_odds_ratio": -0.6350302696228027, |
|
"logits/chosen": -0.7956358194351196, |
|
"logits/rejected": -0.7938474416732788, |
|
"logps/chosen": -0.5379875898361206, |
|
"logps/rejected": -0.6204279065132141, |
|
"loss": 0.5926, |
|
"nll_loss": 0.529137372970581, |
|
"rewards/accuracies": 0.5299999713897705, |
|
"rewards/chosen": -0.05379876494407654, |
|
"rewards/margins": 0.008244026452302933, |
|
"rewards/rejected": -0.06204278767108917, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.109704641350211, |
|
"grad_norm": 1.3082002649896622, |
|
"learning_rate": 3.302034428794992e-07, |
|
"log_odds_chosen": 0.35187309980392456, |
|
"log_odds_ratio": -0.5857384204864502, |
|
"logits/chosen": -0.7866548299789429, |
|
"logits/rejected": -0.8024720549583435, |
|
"logps/chosen": -0.4700208902359009, |
|
"logps/rejected": -0.6245195865631104, |
|
"loss": 0.579, |
|
"nll_loss": 0.506442129611969, |
|
"rewards/accuracies": 0.6299999952316284, |
|
"rewards/chosen": -0.047002095729112625, |
|
"rewards/margins": 0.015449865721166134, |
|
"rewards/rejected": -0.06245195493102074, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.2151898734177213, |
|
"grad_norm": 1.3044699084998892, |
|
"learning_rate": 2.9107981220657274e-07, |
|
"log_odds_chosen": 0.2667557895183563, |
|
"log_odds_ratio": -0.6398404836654663, |
|
"logits/chosen": -0.7439711689949036, |
|
"logits/rejected": -0.7557228207588196, |
|
"logps/chosen": -0.5503268837928772, |
|
"logps/rejected": -0.6489098072052002, |
|
"loss": 0.5758, |
|
"nll_loss": 0.5791399478912354, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.05503269284963608, |
|
"rewards/margins": 0.009858297184109688, |
|
"rewards/rejected": -0.06489098817110062, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.320675105485232, |
|
"grad_norm": 1.2737811980928775, |
|
"learning_rate": 2.5195618153364627e-07, |
|
"log_odds_chosen": 0.27772146463394165, |
|
"log_odds_ratio": -0.6127716898918152, |
|
"logits/chosen": -0.7682886719703674, |
|
"logits/rejected": -0.7594354152679443, |
|
"logps/chosen": -0.5556504726409912, |
|
"logps/rejected": -0.6759030222892761, |
|
"loss": 0.5782, |
|
"nll_loss": 0.5577492713928223, |
|
"rewards/accuracies": 0.5099999904632568, |
|
"rewards/chosen": -0.05556504800915718, |
|
"rewards/margins": 0.012025254778563976, |
|
"rewards/rejected": -0.06759029626846313, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.4261603375527425, |
|
"grad_norm": 1.2627851122190357, |
|
"learning_rate": 2.1283255086071985e-07, |
|
"log_odds_chosen": 0.49636557698249817, |
|
"log_odds_ratio": -0.5460684895515442, |
|
"logits/chosen": -0.7931969165802002, |
|
"logits/rejected": -0.8067951798439026, |
|
"logps/chosen": -0.5168901085853577, |
|
"logps/rejected": -0.7222075462341309, |
|
"loss": 0.56, |
|
"nll_loss": 0.496804803609848, |
|
"rewards/accuracies": 0.5699999928474426, |
|
"rewards/chosen": -0.05168901011347771, |
|
"rewards/margins": 0.02053174003958702, |
|
"rewards/rejected": -0.07222075760364532, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.5316455696202533, |
|
"grad_norm": 1.5232359717633108, |
|
"learning_rate": 1.7370892018779344e-07, |
|
"log_odds_chosen": 0.41104409098625183, |
|
"log_odds_ratio": -0.5702406167984009, |
|
"logits/chosen": -0.7789013385772705, |
|
"logits/rejected": -0.793701171875, |
|
"logps/chosen": -0.4571213126182556, |
|
"logps/rejected": -0.6431244015693665, |
|
"loss": 0.5704, |
|
"nll_loss": 0.4690850079059601, |
|
"rewards/accuracies": 0.5799999833106995, |
|
"rewards/chosen": -0.04571213200688362, |
|
"rewards/margins": 0.018600303679704666, |
|
"rewards/rejected": -0.06431242823600769, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.6371308016877637, |
|
"grad_norm": 1.4542420899735284, |
|
"learning_rate": 1.3458528951486697e-07, |
|
"log_odds_chosen": 0.20925407111644745, |
|
"log_odds_ratio": -0.659648597240448, |
|
"logits/chosen": -0.6941782236099243, |
|
"logits/rejected": -0.710488498210907, |
|
"logps/chosen": -0.5375428795814514, |
|
"logps/rejected": -0.6434080600738525, |
|
"loss": 0.5868, |
|
"nll_loss": 0.5830110311508179, |
|
"rewards/accuracies": 0.38999998569488525, |
|
"rewards/chosen": -0.0537542924284935, |
|
"rewards/margins": 0.01058651227504015, |
|
"rewards/rejected": -0.06434080749750137, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.742616033755274, |
|
"grad_norm": 1.231391960908516, |
|
"learning_rate": 9.546165884194053e-08, |
|
"log_odds_chosen": 0.3515250086784363, |
|
"log_odds_ratio": -0.5891639590263367, |
|
"logits/chosen": -0.749308168888092, |
|
"logits/rejected": -0.7614193558692932, |
|
"logps/chosen": -0.5068987011909485, |
|
"logps/rejected": -0.6622204780578613, |
|
"loss": 0.5909, |
|
"nll_loss": 0.5026033520698547, |
|
"rewards/accuracies": 0.5099999904632568, |
|
"rewards/chosen": -0.05068986862897873, |
|
"rewards/margins": 0.015532179735600948, |
|
"rewards/rejected": -0.06622204929590225, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.848101265822785, |
|
"grad_norm": 1.3722555215938899, |
|
"learning_rate": 5.633802816901408e-08, |
|
"log_odds_chosen": 0.18586792051792145, |
|
"log_odds_ratio": -0.6605378985404968, |
|
"logits/chosen": -0.8417736291885376, |
|
"logits/rejected": -0.8345857858657837, |
|
"logps/chosen": -0.5810900926589966, |
|
"logps/rejected": -0.6728335022926331, |
|
"loss": 0.5815, |
|
"nll_loss": 0.5025544166564941, |
|
"rewards/accuracies": 0.5099999904632568, |
|
"rewards/chosen": -0.05810901150107384, |
|
"rewards/margins": 0.009174343198537827, |
|
"rewards/rejected": -0.06728334724903107, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.9535864978902953, |
|
"grad_norm": 1.9284456011672904, |
|
"learning_rate": 1.7214397496087635e-08, |
|
"log_odds_chosen": 0.1927904486656189, |
|
"log_odds_ratio": -0.6649494171142578, |
|
"logits/chosen": -0.678924024105072, |
|
"logits/rejected": -0.6770960092544556, |
|
"logps/chosen": -0.5262628793716431, |
|
"logps/rejected": -0.580658495426178, |
|
"loss": 0.6001, |
|
"nll_loss": 0.5695458650588989, |
|
"rewards/accuracies": 0.4099999964237213, |
|
"rewards/chosen": -0.05262628570199013, |
|
"rewards/margins": 0.005439565982669592, |
|
"rewards/rejected": -0.05806584656238556, |
|
"step": 700 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 711, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|