abhishek's picture
abhishek HF staff
Upload folder using huggingface_hub
2e1df21 verified
raw
history blame
19.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 711,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10548523206751055,
"grad_norm": 30.46313288029562,
"learning_rate": 3.472222222222222e-07,
"log_odds_chosen": 0.16534051299095154,
"log_odds_ratio": -0.6545013189315796,
"logits/chosen": -0.41492316126823425,
"logits/rejected": -0.435069739818573,
"logps/chosen": -0.9887133836746216,
"logps/rejected": -1.1123149394989014,
"loss": 3.875,
"nll_loss": 3.5094263553619385,
"rewards/accuracies": 0.4300000071525574,
"rewards/chosen": -0.09887134283781052,
"rewards/margins": 0.012360147200524807,
"rewards/rejected": -0.1112314984202385,
"step": 25
},
{
"epoch": 0.2109704641350211,
"grad_norm": 74.88568859723209,
"learning_rate": 6.944444444444444e-07,
"log_odds_chosen": 0.10536957532167435,
"log_odds_ratio": -0.6801052689552307,
"logits/chosen": -0.9151662588119507,
"logits/rejected": -0.9367119073867798,
"logps/chosen": -1.117153286933899,
"logps/rejected": -1.173445463180542,
"loss": 0.8198,
"nll_loss": 0.7042462229728699,
"rewards/accuracies": 0.5299999713897705,
"rewards/chosen": -0.11171531677246094,
"rewards/margins": 0.005629217717796564,
"rewards/rejected": -0.11734454333782196,
"step": 50
},
{
"epoch": 0.31645569620253167,
"grad_norm": 4.304125765256844,
"learning_rate": 9.953051643192487e-07,
"log_odds_chosen": 0.1444670855998993,
"log_odds_ratio": -0.6669880747795105,
"logits/chosen": -0.8349351286888123,
"logits/rejected": -0.8575166463851929,
"logps/chosen": -0.9019416570663452,
"logps/rejected": -0.9740175604820251,
"loss": 0.7274,
"nll_loss": 0.6248427033424377,
"rewards/accuracies": 0.4399999976158142,
"rewards/chosen": -0.09019416570663452,
"rewards/margins": 0.007207601796835661,
"rewards/rejected": -0.09740178287029266,
"step": 75
},
{
"epoch": 0.4219409282700422,
"grad_norm": 3.3433596753401935,
"learning_rate": 9.561815336463224e-07,
"log_odds_chosen": 0.18847505748271942,
"log_odds_ratio": -0.6345129609107971,
"logits/chosen": -0.8627865314483643,
"logits/rejected": -0.8871102333068848,
"logps/chosen": -0.774340033531189,
"logps/rejected": -0.8760174512863159,
"loss": 0.6753,
"nll_loss": 0.6059034466743469,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.0774340033531189,
"rewards/margins": 0.010167734697461128,
"rewards/rejected": -0.08760173618793488,
"step": 100
},
{
"epoch": 0.5274261603375527,
"grad_norm": 6.317691511765167,
"learning_rate": 9.170579029733959e-07,
"log_odds_chosen": 0.18306466937065125,
"log_odds_ratio": -0.6469370126724243,
"logits/chosen": -0.8607066869735718,
"logits/rejected": -0.8643490672111511,
"logps/chosen": -0.5890992283821106,
"logps/rejected": -0.6761559247970581,
"loss": 0.6409,
"nll_loss": 0.6029787659645081,
"rewards/accuracies": 0.4699999988079071,
"rewards/chosen": -0.05890992656350136,
"rewards/margins": 0.00870567373931408,
"rewards/rejected": -0.06761559844017029,
"step": 125
},
{
"epoch": 0.6329113924050633,
"grad_norm": 5.056963941311914,
"learning_rate": 8.779342723004695e-07,
"log_odds_chosen": 0.22126027941703796,
"log_odds_ratio": -0.6363757848739624,
"logits/chosen": -0.9035418629646301,
"logits/rejected": -0.9076377749443054,
"logps/chosen": -0.5781998634338379,
"logps/rejected": -0.6600324511528015,
"loss": 0.6416,
"nll_loss": 0.5581729412078857,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.05781999230384827,
"rewards/margins": 0.008183254860341549,
"rewards/rejected": -0.06600324809551239,
"step": 150
},
{
"epoch": 0.7383966244725738,
"grad_norm": 3.205932624220109,
"learning_rate": 8.38810641627543e-07,
"log_odds_chosen": 0.3526044189929962,
"log_odds_ratio": -0.5982345342636108,
"logits/chosen": -0.8294059634208679,
"logits/rejected": -0.8529558777809143,
"logps/chosen": -0.5301632881164551,
"logps/rejected": -0.661786675453186,
"loss": 0.5897,
"nll_loss": 0.5430833697319031,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.05301632732152939,
"rewards/margins": 0.013162333518266678,
"rewards/rejected": -0.06617865711450577,
"step": 175
},
{
"epoch": 0.8438818565400844,
"grad_norm": 2.682686637504157,
"learning_rate": 7.996870109546165e-07,
"log_odds_chosen": 0.2721116840839386,
"log_odds_ratio": -0.6195984482765198,
"logits/chosen": -0.8821828365325928,
"logits/rejected": -0.9078083634376526,
"logps/chosen": -0.5987895131111145,
"logps/rejected": -0.757358193397522,
"loss": 0.6351,
"nll_loss": 0.5751867890357971,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.05987895652651787,
"rewards/margins": 0.01585685834288597,
"rewards/rejected": -0.07573581486940384,
"step": 200
},
{
"epoch": 0.9493670886075949,
"grad_norm": 1.943018301402564,
"learning_rate": 7.605633802816901e-07,
"log_odds_chosen": 0.19566155970096588,
"log_odds_ratio": -0.6696605682373047,
"logits/chosen": -0.8021306395530701,
"logits/rejected": -0.8081836104393005,
"logps/chosen": -0.509245753288269,
"logps/rejected": -0.6033316254615784,
"loss": 0.6222,
"nll_loss": 0.5073065161705017,
"rewards/accuracies": 0.5099999904632568,
"rewards/chosen": -0.05092458426952362,
"rewards/margins": 0.00940858107060194,
"rewards/rejected": -0.06033316254615784,
"step": 225
},
{
"epoch": 1.0548523206751055,
"grad_norm": 1.7595986013614073,
"learning_rate": 7.214397496087636e-07,
"log_odds_chosen": 0.3115460276603699,
"log_odds_ratio": -0.6099376678466797,
"logits/chosen": -0.8336898684501648,
"logits/rejected": -0.8424633145332336,
"logps/chosen": -0.5518218874931335,
"logps/rejected": -0.6624475717544556,
"loss": 0.599,
"nll_loss": 0.506114661693573,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.055182188749313354,
"rewards/margins": 0.011062567122280598,
"rewards/rejected": -0.06624475121498108,
"step": 250
},
{
"epoch": 1.160337552742616,
"grad_norm": 2.131980857522191,
"learning_rate": 6.823161189358372e-07,
"log_odds_chosen": 0.3637891113758087,
"log_odds_ratio": -0.598861575126648,
"logits/chosen": -0.8110294342041016,
"logits/rejected": -0.8618900775909424,
"logps/chosen": -0.5288816690444946,
"logps/rejected": -0.7046244144439697,
"loss": 0.6147,
"nll_loss": 0.5313621163368225,
"rewards/accuracies": 0.6100000143051147,
"rewards/chosen": -0.0528881661593914,
"rewards/margins": 0.017574286088347435,
"rewards/rejected": -0.07046245038509369,
"step": 275
},
{
"epoch": 1.2658227848101267,
"grad_norm": 1.4186554490026797,
"learning_rate": 6.431924882629108e-07,
"log_odds_chosen": 0.3448210060596466,
"log_odds_ratio": -0.590344250202179,
"logits/chosen": -0.8463892340660095,
"logits/rejected": -0.8793061971664429,
"logps/chosen": -0.5530957579612732,
"logps/rejected": -0.7298399209976196,
"loss": 0.5784,
"nll_loss": 0.5125244855880737,
"rewards/accuracies": 0.5600000023841858,
"rewards/chosen": -0.05530957877635956,
"rewards/margins": 0.017674420028924942,
"rewards/rejected": -0.0729840025305748,
"step": 300
},
{
"epoch": 1.371308016877637,
"grad_norm": 1.5618238810197242,
"learning_rate": 6.040688575899842e-07,
"log_odds_chosen": 0.3834618330001831,
"log_odds_ratio": -0.5801408290863037,
"logits/chosen": -0.7922927141189575,
"logits/rejected": -0.807830274105072,
"logps/chosen": -0.5033449530601501,
"logps/rejected": -0.6853150725364685,
"loss": 0.6035,
"nll_loss": 0.5220791697502136,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.05033449828624725,
"rewards/margins": 0.018197014927864075,
"rewards/rejected": -0.06853151321411133,
"step": 325
},
{
"epoch": 1.4767932489451476,
"grad_norm": 1.6694346253469523,
"learning_rate": 5.649452269170579e-07,
"log_odds_chosen": 0.23677273094654083,
"log_odds_ratio": -0.6395828127861023,
"logits/chosen": -0.7438558340072632,
"logits/rejected": -0.76462721824646,
"logps/chosen": -0.5047862529754639,
"logps/rejected": -0.5797430276870728,
"loss": 0.5862,
"nll_loss": 0.5142392516136169,
"rewards/accuracies": 0.5099999904632568,
"rewards/chosen": -0.05047862231731415,
"rewards/margins": 0.007495685946196318,
"rewards/rejected": -0.05797431617975235,
"step": 350
},
{
"epoch": 1.5822784810126582,
"grad_norm": 1.6654016970385481,
"learning_rate": 5.258215962441315e-07,
"log_odds_chosen": 0.3365418314933777,
"log_odds_ratio": -0.5955182313919067,
"logits/chosen": -0.761318564414978,
"logits/rejected": -0.7731548547744751,
"logps/chosen": -0.49533072113990784,
"logps/rejected": -0.6420021057128906,
"loss": 0.5995,
"nll_loss": 0.5266813039779663,
"rewards/accuracies": 0.5400000214576721,
"rewards/chosen": -0.04953307658433914,
"rewards/margins": 0.014667129144072533,
"rewards/rejected": -0.06420020759105682,
"step": 375
},
{
"epoch": 1.6877637130801688,
"grad_norm": 1.4385786677568302,
"learning_rate": 4.86697965571205e-07,
"log_odds_chosen": 0.27425241470336914,
"log_odds_ratio": -0.6355130076408386,
"logits/chosen": -0.7875126600265503,
"logits/rejected": -0.7872836589813232,
"logps/chosen": -0.5322539806365967,
"logps/rejected": -0.6498347520828247,
"loss": 0.5813,
"nll_loss": 0.5109381675720215,
"rewards/accuracies": 0.5099999904632568,
"rewards/chosen": -0.05322539806365967,
"rewards/margins": 0.011758077889680862,
"rewards/rejected": -0.06498347222805023,
"step": 400
},
{
"epoch": 1.7932489451476794,
"grad_norm": 1.3133499382164198,
"learning_rate": 4.475743348982786e-07,
"log_odds_chosen": 0.4066045880317688,
"log_odds_ratio": -0.5856396555900574,
"logits/chosen": -0.8323702812194824,
"logits/rejected": -0.8558816313743591,
"logps/chosen": -0.5287883281707764,
"logps/rejected": -0.7097746133804321,
"loss": 0.5983,
"nll_loss": 0.5335114002227783,
"rewards/accuracies": 0.5600000023841858,
"rewards/chosen": -0.052878838032484055,
"rewards/margins": 0.018098626285791397,
"rewards/rejected": -0.07097746431827545,
"step": 425
},
{
"epoch": 1.8987341772151898,
"grad_norm": 1.473901535326238,
"learning_rate": 4.084507042253521e-07,
"log_odds_chosen": 0.41008836030960083,
"log_odds_ratio": -0.5757314562797546,
"logits/chosen": -0.8004408478736877,
"logits/rejected": -0.8020641207695007,
"logps/chosen": -0.550152599811554,
"logps/rejected": -0.7004598379135132,
"loss": 0.586,
"nll_loss": 0.5043174028396606,
"rewards/accuracies": 0.5899999737739563,
"rewards/chosen": -0.055015262216329575,
"rewards/margins": 0.015030724927783012,
"rewards/rejected": -0.07004599273204803,
"step": 450
},
{
"epoch": 2.0042194092827006,
"grad_norm": 1.2607531968315342,
"learning_rate": 3.6932707355242563e-07,
"log_odds_chosen": 0.2668948769569397,
"log_odds_ratio": -0.6350302696228027,
"logits/chosen": -0.7956358194351196,
"logits/rejected": -0.7938474416732788,
"logps/chosen": -0.5379875898361206,
"logps/rejected": -0.6204279065132141,
"loss": 0.5926,
"nll_loss": 0.529137372970581,
"rewards/accuracies": 0.5299999713897705,
"rewards/chosen": -0.05379876494407654,
"rewards/margins": 0.008244026452302933,
"rewards/rejected": -0.06204278767108917,
"step": 475
},
{
"epoch": 2.109704641350211,
"grad_norm": 1.3082002649896622,
"learning_rate": 3.302034428794992e-07,
"log_odds_chosen": 0.35187309980392456,
"log_odds_ratio": -0.5857384204864502,
"logits/chosen": -0.7866548299789429,
"logits/rejected": -0.8024720549583435,
"logps/chosen": -0.4700208902359009,
"logps/rejected": -0.6245195865631104,
"loss": 0.579,
"nll_loss": 0.506442129611969,
"rewards/accuracies": 0.6299999952316284,
"rewards/chosen": -0.047002095729112625,
"rewards/margins": 0.015449865721166134,
"rewards/rejected": -0.06245195493102074,
"step": 500
},
{
"epoch": 2.2151898734177213,
"grad_norm": 1.3044699084998892,
"learning_rate": 2.9107981220657274e-07,
"log_odds_chosen": 0.2667557895183563,
"log_odds_ratio": -0.6398404836654663,
"logits/chosen": -0.7439711689949036,
"logits/rejected": -0.7557228207588196,
"logps/chosen": -0.5503268837928772,
"logps/rejected": -0.6489098072052002,
"loss": 0.5758,
"nll_loss": 0.5791399478912354,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.05503269284963608,
"rewards/margins": 0.009858297184109688,
"rewards/rejected": -0.06489098817110062,
"step": 525
},
{
"epoch": 2.320675105485232,
"grad_norm": 1.2737811980928775,
"learning_rate": 2.5195618153364627e-07,
"log_odds_chosen": 0.27772146463394165,
"log_odds_ratio": -0.6127716898918152,
"logits/chosen": -0.7682886719703674,
"logits/rejected": -0.7594354152679443,
"logps/chosen": -0.5556504726409912,
"logps/rejected": -0.6759030222892761,
"loss": 0.5782,
"nll_loss": 0.5577492713928223,
"rewards/accuracies": 0.5099999904632568,
"rewards/chosen": -0.05556504800915718,
"rewards/margins": 0.012025254778563976,
"rewards/rejected": -0.06759029626846313,
"step": 550
},
{
"epoch": 2.4261603375527425,
"grad_norm": 1.2627851122190357,
"learning_rate": 2.1283255086071985e-07,
"log_odds_chosen": 0.49636557698249817,
"log_odds_ratio": -0.5460684895515442,
"logits/chosen": -0.7931969165802002,
"logits/rejected": -0.8067951798439026,
"logps/chosen": -0.5168901085853577,
"logps/rejected": -0.7222075462341309,
"loss": 0.56,
"nll_loss": 0.496804803609848,
"rewards/accuracies": 0.5699999928474426,
"rewards/chosen": -0.05168901011347771,
"rewards/margins": 0.02053174003958702,
"rewards/rejected": -0.07222075760364532,
"step": 575
},
{
"epoch": 2.5316455696202533,
"grad_norm": 1.5232359717633108,
"learning_rate": 1.7370892018779344e-07,
"log_odds_chosen": 0.41104409098625183,
"log_odds_ratio": -0.5702406167984009,
"logits/chosen": -0.7789013385772705,
"logits/rejected": -0.793701171875,
"logps/chosen": -0.4571213126182556,
"logps/rejected": -0.6431244015693665,
"loss": 0.5704,
"nll_loss": 0.4690850079059601,
"rewards/accuracies": 0.5799999833106995,
"rewards/chosen": -0.04571213200688362,
"rewards/margins": 0.018600303679704666,
"rewards/rejected": -0.06431242823600769,
"step": 600
},
{
"epoch": 2.6371308016877637,
"grad_norm": 1.4542420899735284,
"learning_rate": 1.3458528951486697e-07,
"log_odds_chosen": 0.20925407111644745,
"log_odds_ratio": -0.659648597240448,
"logits/chosen": -0.6941782236099243,
"logits/rejected": -0.710488498210907,
"logps/chosen": -0.5375428795814514,
"logps/rejected": -0.6434080600738525,
"loss": 0.5868,
"nll_loss": 0.5830110311508179,
"rewards/accuracies": 0.38999998569488525,
"rewards/chosen": -0.0537542924284935,
"rewards/margins": 0.01058651227504015,
"rewards/rejected": -0.06434080749750137,
"step": 625
},
{
"epoch": 2.742616033755274,
"grad_norm": 1.231391960908516,
"learning_rate": 9.546165884194053e-08,
"log_odds_chosen": 0.3515250086784363,
"log_odds_ratio": -0.5891639590263367,
"logits/chosen": -0.749308168888092,
"logits/rejected": -0.7614193558692932,
"logps/chosen": -0.5068987011909485,
"logps/rejected": -0.6622204780578613,
"loss": 0.5909,
"nll_loss": 0.5026033520698547,
"rewards/accuracies": 0.5099999904632568,
"rewards/chosen": -0.05068986862897873,
"rewards/margins": 0.015532179735600948,
"rewards/rejected": -0.06622204929590225,
"step": 650
},
{
"epoch": 2.848101265822785,
"grad_norm": 1.3722555215938899,
"learning_rate": 5.633802816901408e-08,
"log_odds_chosen": 0.18586792051792145,
"log_odds_ratio": -0.6605378985404968,
"logits/chosen": -0.8417736291885376,
"logits/rejected": -0.8345857858657837,
"logps/chosen": -0.5810900926589966,
"logps/rejected": -0.6728335022926331,
"loss": 0.5815,
"nll_loss": 0.5025544166564941,
"rewards/accuracies": 0.5099999904632568,
"rewards/chosen": -0.05810901150107384,
"rewards/margins": 0.009174343198537827,
"rewards/rejected": -0.06728334724903107,
"step": 675
},
{
"epoch": 2.9535864978902953,
"grad_norm": 1.9284456011672904,
"learning_rate": 1.7214397496087635e-08,
"log_odds_chosen": 0.1927904486656189,
"log_odds_ratio": -0.6649494171142578,
"logits/chosen": -0.678924024105072,
"logits/rejected": -0.6770960092544556,
"logps/chosen": -0.5262628793716431,
"logps/rejected": -0.580658495426178,
"loss": 0.6001,
"nll_loss": 0.5695458650588989,
"rewards/accuracies": 0.4099999964237213,
"rewards/chosen": -0.05262628570199013,
"rewards/margins": 0.005439565982669592,
"rewards/rejected": -0.05806584656238556,
"step": 700
}
],
"logging_steps": 25,
"max_steps": 711,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}