phi3-medium-spin-zephyr-data / trainer_state.json
Yifan Wang
Update from wang5617
b7116f8
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 4689,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.066098081023454e-09,
"logits/generated": 6.313449382781982,
"logits/real": 4.503366947174072,
"logps/generated": -273.765380859375,
"logps/real": -221.5892333984375,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 1.0660980810234541e-08,
"logits/generated": 6.417145729064941,
"logits/real": 4.833721160888672,
"logps/generated": -281.4638366699219,
"logps/real": -251.9319610595703,
"loss": 0.6946,
"rewards/accuracies": 0.0416666679084301,
"rewards/generated": 0.0008282132912427187,
"rewards/margins": -0.0019732369109988213,
"rewards/real": -0.0011450237361714244,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 2.1321961620469082e-08,
"logits/generated": 6.508301734924316,
"logits/real": 4.467240333557129,
"logps/generated": -281.99346923828125,
"logps/real": -244.68807983398438,
"loss": 0.6907,
"rewards/accuracies": 0.42500001192092896,
"rewards/generated": -0.01464166771620512,
"rewards/margins": 0.011996939778327942,
"rewards/real": -0.0026447249110788107,
"step": 20
},
{
"epoch": 0.02,
"learning_rate": 3.1982942430703625e-08,
"logits/generated": 6.402890682220459,
"logits/real": 4.9113664627075195,
"logps/generated": -280.8014831542969,
"logps/real": -265.4349365234375,
"loss": 0.6959,
"rewards/accuracies": 0.5874999761581421,
"rewards/generated": 0.0067442902363836765,
"rewards/margins": 0.00477126520127058,
"rewards/real": 0.011515555903315544,
"step": 30
},
{
"epoch": 0.03,
"learning_rate": 4.2643923240938164e-08,
"logits/generated": 6.590306758880615,
"logits/real": 4.578217029571533,
"logps/generated": -270.75592041015625,
"logps/real": -240.4984130859375,
"loss": 0.7,
"rewards/accuracies": 0.512499988079071,
"rewards/generated": 0.0005118753761053085,
"rewards/margins": -0.0064646475948393345,
"rewards/real": -0.005952772684395313,
"step": 40
},
{
"epoch": 0.03,
"learning_rate": 5.3304904051172704e-08,
"logits/generated": 6.504673957824707,
"logits/real": 4.641873359680176,
"logps/generated": -276.2430419921875,
"logps/real": -224.84249877929688,
"loss": 0.6889,
"rewards/accuracies": 0.512499988079071,
"rewards/generated": -0.033875368535518646,
"rewards/margins": 0.02717522345483303,
"rewards/real": -0.006700146943330765,
"step": 50
},
{
"epoch": 0.04,
"learning_rate": 6.396588486140725e-08,
"logits/generated": 6.473546504974365,
"logits/real": 5.055091381072998,
"logps/generated": -289.09674072265625,
"logps/real": -243.2639923095703,
"loss": 0.6977,
"rewards/accuracies": 0.574999988079071,
"rewards/generated": -0.015207557007670403,
"rewards/margins": 0.020673025399446487,
"rewards/real": 0.00546546746045351,
"step": 60
},
{
"epoch": 0.04,
"learning_rate": 7.462686567164178e-08,
"logits/generated": 6.487510681152344,
"logits/real": 4.849614143371582,
"logps/generated": -283.6287536621094,
"logps/real": -268.0138854980469,
"loss": 0.6988,
"rewards/accuracies": 0.5,
"rewards/generated": -0.020940685644745827,
"rewards/margins": 0.0006496586138382554,
"rewards/real": -0.020291026681661606,
"step": 70
},
{
"epoch": 0.05,
"learning_rate": 8.528784648187633e-08,
"logits/generated": 6.551278114318848,
"logits/real": 4.523703575134277,
"logps/generated": -283.064697265625,
"logps/real": -237.7630157470703,
"loss": 0.6961,
"rewards/accuracies": 0.4625000059604645,
"rewards/generated": -0.003950852435082197,
"rewards/margins": 0.003412533551454544,
"rewards/real": -0.0005383208626881242,
"step": 80
},
{
"epoch": 0.06,
"learning_rate": 9.594882729211087e-08,
"logits/generated": 6.489449501037598,
"logits/real": 4.616640090942383,
"logps/generated": -294.86614990234375,
"logps/real": -257.0780029296875,
"loss": 0.6965,
"rewards/accuracies": 0.512499988079071,
"rewards/generated": -0.001603059470653534,
"rewards/margins": -0.0017856752965599298,
"rewards/real": -0.0033887363970279694,
"step": 90
},
{
"epoch": 0.06,
"learning_rate": 1.0660980810234541e-07,
"logits/generated": 6.500540256500244,
"logits/real": 4.6778364181518555,
"logps/generated": -280.27447509765625,
"logps/real": -240.3990020751953,
"loss": 0.6985,
"rewards/accuracies": 0.48750001192092896,
"rewards/generated": 0.01377450954169035,
"rewards/margins": -0.026643192395567894,
"rewards/real": -0.01286868192255497,
"step": 100
},
{
"epoch": 0.07,
"learning_rate": 1.1727078891257995e-07,
"logits/generated": 6.49435567855835,
"logits/real": 4.850351810455322,
"logps/generated": -284.07623291015625,
"logps/real": -256.1934814453125,
"loss": 0.699,
"rewards/accuracies": 0.5249999761581421,
"rewards/generated": 0.002641477854922414,
"rewards/margins": 0.004401583690196276,
"rewards/real": 0.007043060846626759,
"step": 110
},
{
"epoch": 0.08,
"learning_rate": 1.279317697228145e-07,
"logits/generated": 6.4412078857421875,
"logits/real": 4.881103038787842,
"logps/generated": -281.96685791015625,
"logps/real": -240.1349334716797,
"loss": 0.7007,
"rewards/accuracies": 0.5375000238418579,
"rewards/generated": -0.010973912663757801,
"rewards/margins": -0.00332402135245502,
"rewards/real": -0.014297932386398315,
"step": 120
},
{
"epoch": 0.08,
"learning_rate": 1.3859275053304903e-07,
"logits/generated": 6.572972297668457,
"logits/real": 4.993831634521484,
"logps/generated": -278.8970031738281,
"logps/real": -253.2180633544922,
"loss": 0.6918,
"rewards/accuracies": 0.5625,
"rewards/generated": -0.0238101277500391,
"rewards/margins": 0.015025329776108265,
"rewards/real": -0.00878479890525341,
"step": 130
},
{
"epoch": 0.09,
"learning_rate": 1.4925373134328355e-07,
"logits/generated": 6.496808052062988,
"logits/real": 4.527266979217529,
"logps/generated": -286.42230224609375,
"logps/real": -238.97653198242188,
"loss": 0.6908,
"rewards/accuracies": 0.550000011920929,
"rewards/generated": -0.015367841348052025,
"rewards/margins": 0.015413427725434303,
"rewards/real": 4.558637738227844e-05,
"step": 140
},
{
"epoch": 0.1,
"learning_rate": 1.5991471215351813e-07,
"logits/generated": 6.386403560638428,
"logits/real": 5.1331682205200195,
"logps/generated": -288.5858459472656,
"logps/real": -252.1357879638672,
"loss": 0.6973,
"rewards/accuracies": 0.4749999940395355,
"rewards/generated": 0.001897630630992353,
"rewards/margins": -0.013987990096211433,
"rewards/real": -0.012090359814465046,
"step": 150
},
{
"epoch": 0.1,
"learning_rate": 1.7057569296375266e-07,
"logits/generated": 6.469916343688965,
"logits/real": 4.803389549255371,
"logps/generated": -279.75665283203125,
"logps/real": -249.20278930664062,
"loss": 0.6928,
"rewards/accuracies": 0.4749999940395355,
"rewards/generated": -0.00042929648770950735,
"rewards/margins": -0.006332563702017069,
"rewards/real": -0.0067618610337376595,
"step": 160
},
{
"epoch": 0.11,
"learning_rate": 1.8123667377398718e-07,
"logits/generated": 6.469520568847656,
"logits/real": 4.792954444885254,
"logps/generated": -278.6644592285156,
"logps/real": -238.6741485595703,
"loss": 0.694,
"rewards/accuracies": 0.550000011920929,
"rewards/generated": -0.006913432385772467,
"rewards/margins": 0.017923034727573395,
"rewards/real": 0.01100960187613964,
"step": 170
},
{
"epoch": 0.12,
"learning_rate": 1.9189765458422174e-07,
"logits/generated": 6.304480075836182,
"logits/real": 5.01064395904541,
"logps/generated": -292.3460388183594,
"logps/real": -263.7362976074219,
"loss": 0.7018,
"rewards/accuracies": 0.44999998807907104,
"rewards/generated": 0.00147080363240093,
"rewards/margins": -0.01612243428826332,
"rewards/real": -0.014651629142463207,
"step": 180
},
{
"epoch": 0.12,
"learning_rate": 2.025586353944563e-07,
"logits/generated": 6.497210502624512,
"logits/real": 4.78942346572876,
"logps/generated": -282.570068359375,
"logps/real": -263.93743896484375,
"loss": 0.6982,
"rewards/accuracies": 0.4375,
"rewards/generated": 0.003275098744779825,
"rewards/margins": -0.005347794853150845,
"rewards/real": -0.0020726968068629503,
"step": 190
},
{
"epoch": 0.13,
"learning_rate": 2.1321961620469082e-07,
"logits/generated": 6.557346343994141,
"logits/real": 4.690012454986572,
"logps/generated": -289.92633056640625,
"logps/real": -247.98934936523438,
"loss": 0.6967,
"rewards/accuracies": 0.5,
"rewards/generated": -0.004312173463404179,
"rewards/margins": 0.004754967056214809,
"rewards/real": 0.00044279481517151,
"step": 200
},
{
"epoch": 0.13,
"learning_rate": 2.2388059701492537e-07,
"logits/generated": 6.627219200134277,
"logits/real": 4.5740742683410645,
"logps/generated": -274.96441650390625,
"logps/real": -227.3006591796875,
"loss": 0.6883,
"rewards/accuracies": 0.550000011920929,
"rewards/generated": -0.019664818421006203,
"rewards/margins": 0.017926085740327835,
"rewards/real": -0.0017387343104928732,
"step": 210
},
{
"epoch": 0.14,
"learning_rate": 2.345415778251599e-07,
"logits/generated": 6.484269618988037,
"logits/real": 4.570880889892578,
"logps/generated": -285.11175537109375,
"logps/real": -222.089111328125,
"loss": 0.6939,
"rewards/accuracies": 0.48750001192092896,
"rewards/generated": -0.00420150812715292,
"rewards/margins": -0.003182200016453862,
"rewards/real": -0.007383708842098713,
"step": 220
},
{
"epoch": 0.15,
"learning_rate": 2.452025586353944e-07,
"logits/generated": 6.622496128082275,
"logits/real": 4.402390480041504,
"logps/generated": -283.72247314453125,
"logps/real": -238.875244140625,
"loss": 0.6884,
"rewards/accuracies": 0.625,
"rewards/generated": -0.0316481851041317,
"rewards/margins": 0.037973009049892426,
"rewards/real": 0.00632482161745429,
"step": 230
},
{
"epoch": 0.15,
"learning_rate": 2.55863539445629e-07,
"logits/generated": 6.519092559814453,
"logits/real": 5.1569108963012695,
"logps/generated": -294.76727294921875,
"logps/real": -287.53533935546875,
"loss": 0.6922,
"rewards/accuracies": 0.5375000238418579,
"rewards/generated": -0.0013995547778904438,
"rewards/margins": 0.0036062332801520824,
"rewards/real": 0.002206678967922926,
"step": 240
},
{
"epoch": 0.16,
"learning_rate": 2.665245202558635e-07,
"logits/generated": 6.4200615882873535,
"logits/real": 4.65548038482666,
"logps/generated": -285.3873596191406,
"logps/real": -228.32009887695312,
"loss": 0.6837,
"rewards/accuracies": 0.512499988079071,
"rewards/generated": 0.003386992961168289,
"rewards/margins": 0.010126499459147453,
"rewards/real": 0.013513492420315742,
"step": 250
},
{
"epoch": 0.17,
"learning_rate": 2.7718550106609805e-07,
"logits/generated": 6.4657487869262695,
"logits/real": 4.301976203918457,
"logps/generated": -280.96539306640625,
"logps/real": -224.1595458984375,
"loss": 0.6919,
"rewards/accuracies": 0.550000011920929,
"rewards/generated": -0.006485415156930685,
"rewards/margins": 0.011584701016545296,
"rewards/real": 0.005099285393953323,
"step": 260
},
{
"epoch": 0.17,
"learning_rate": 2.878464818763326e-07,
"logits/generated": 6.452627658843994,
"logits/real": 4.714515209197998,
"logps/generated": -274.6763000488281,
"logps/real": -239.6040802001953,
"loss": 0.6897,
"rewards/accuracies": 0.5625,
"rewards/generated": -0.011514711193740368,
"rewards/margins": 0.023869235068559647,
"rewards/real": 0.012354524806141853,
"step": 270
},
{
"epoch": 0.18,
"learning_rate": 2.985074626865671e-07,
"logits/generated": 6.498050689697266,
"logits/real": 4.76090669631958,
"logps/generated": -270.68817138671875,
"logps/real": -256.23040771484375,
"loss": 0.6875,
"rewards/accuracies": 0.6000000238418579,
"rewards/generated": -0.023040171712636948,
"rewards/margins": 0.00843932293355465,
"rewards/real": -0.014600845053792,
"step": 280
},
{
"epoch": 0.19,
"learning_rate": 3.0916844349680174e-07,
"logits/generated": 6.579047203063965,
"logits/real": 4.5393829345703125,
"logps/generated": -292.82879638671875,
"logps/real": -251.0160369873047,
"loss": 0.6915,
"rewards/accuracies": 0.5249999761581421,
"rewards/generated": -0.0038245960604399443,
"rewards/margins": 0.011936083436012268,
"rewards/real": 0.008111484348773956,
"step": 290
},
{
"epoch": 0.19,
"learning_rate": 3.1982942430703626e-07,
"logits/generated": 6.381264686584473,
"logits/real": 4.537992477416992,
"logps/generated": -273.74224853515625,
"logps/real": -239.7407684326172,
"loss": 0.684,
"rewards/accuracies": 0.512499988079071,
"rewards/generated": -0.004199598915874958,
"rewards/margins": 0.017961198464035988,
"rewards/real": 0.01376159768551588,
"step": 300
},
{
"epoch": 0.2,
"learning_rate": 3.304904051172708e-07,
"logits/generated": 6.469088554382324,
"logits/real": 4.945279598236084,
"logps/generated": -275.0647277832031,
"logps/real": -260.91900634765625,
"loss": 0.6837,
"rewards/accuracies": 0.625,
"rewards/generated": -0.0022400771267712116,
"rewards/margins": 0.012093605473637581,
"rewards/real": 0.009853528812527657,
"step": 310
},
{
"epoch": 0.2,
"learning_rate": 3.411513859275053e-07,
"logits/generated": 6.3869123458862305,
"logits/real": 4.953930854797363,
"logps/generated": -277.23260498046875,
"logps/real": -258.80120849609375,
"loss": 0.685,
"rewards/accuracies": 0.637499988079071,
"rewards/generated": 0.0009156037122011185,
"rewards/margins": 0.01676994189620018,
"rewards/real": 0.01768554374575615,
"step": 320
},
{
"epoch": 0.21,
"learning_rate": 3.5181236673773984e-07,
"logits/generated": 6.399374961853027,
"logits/real": 4.842419147491455,
"logps/generated": -270.574951171875,
"logps/real": -259.90386962890625,
"loss": 0.6783,
"rewards/accuracies": 0.6625000238418579,
"rewards/generated": -0.014367667958140373,
"rewards/margins": 0.042558759450912476,
"rewards/real": 0.0281910952180624,
"step": 330
},
{
"epoch": 0.22,
"learning_rate": 3.6247334754797437e-07,
"logits/generated": 6.49670934677124,
"logits/real": 4.750965118408203,
"logps/generated": -281.72406005859375,
"logps/real": -254.224853515625,
"loss": 0.6784,
"rewards/accuracies": 0.574999988079071,
"rewards/generated": 0.0038049505092203617,
"rewards/margins": 0.02935839258134365,
"rewards/real": 0.03316333517432213,
"step": 340
},
{
"epoch": 0.22,
"learning_rate": 3.7313432835820895e-07,
"logits/generated": 6.555943965911865,
"logits/real": 4.749678134918213,
"logps/generated": -271.36529541015625,
"logps/real": -246.26235961914062,
"loss": 0.6711,
"rewards/accuracies": 0.6499999761581421,
"rewards/generated": -0.008218175731599331,
"rewards/margins": 0.044897980988025665,
"rewards/real": 0.03667980432510376,
"step": 350
},
{
"epoch": 0.23,
"learning_rate": 3.8379530916844347e-07,
"logits/generated": 6.510591983795166,
"logits/real": 4.735245227813721,
"logps/generated": -277.96197509765625,
"logps/real": -237.43386840820312,
"loss": 0.6671,
"rewards/accuracies": 0.6499999761581421,
"rewards/generated": -0.022423155605793,
"rewards/margins": 0.0445123165845871,
"rewards/real": 0.022089168429374695,
"step": 360
},
{
"epoch": 0.24,
"learning_rate": 3.9445628997867805e-07,
"logits/generated": 6.379269599914551,
"logits/real": 5.035122871398926,
"logps/generated": -270.5735778808594,
"logps/real": -274.4202880859375,
"loss": 0.6704,
"rewards/accuracies": 0.637499988079071,
"rewards/generated": -0.0076493457891047,
"rewards/margins": 0.043878089636564255,
"rewards/real": 0.03622874245047569,
"step": 370
},
{
"epoch": 0.24,
"learning_rate": 4.051172707889126e-07,
"logits/generated": 6.588892459869385,
"logits/real": 4.547529697418213,
"logps/generated": -293.5452880859375,
"logps/real": -258.05047607421875,
"loss": 0.67,
"rewards/accuracies": 0.574999988079071,
"rewards/generated": -0.014542962424457073,
"rewards/margins": 0.04654566943645477,
"rewards/real": 0.032002706080675125,
"step": 380
},
{
"epoch": 0.25,
"learning_rate": 4.157782515991471e-07,
"logits/generated": 6.528749942779541,
"logits/real": 4.817520618438721,
"logps/generated": -278.42205810546875,
"logps/real": -245.11782836914062,
"loss": 0.6648,
"rewards/accuracies": 0.637499988079071,
"rewards/generated": -0.01723286882042885,
"rewards/margins": 0.05723923444747925,
"rewards/real": 0.0400063656270504,
"step": 390
},
{
"epoch": 0.26,
"learning_rate": 4.2643923240938163e-07,
"logits/generated": 6.4185590744018555,
"logits/real": 4.532934665679932,
"logps/generated": -268.5301208496094,
"logps/real": -250.79397583007812,
"loss": 0.6603,
"rewards/accuracies": 0.7124999761581421,
"rewards/generated": -0.02642243169248104,
"rewards/margins": 0.08120186626911163,
"rewards/real": 0.05477944016456604,
"step": 400
},
{
"epoch": 0.26,
"learning_rate": 4.371002132196162e-07,
"logits/generated": 6.537232875823975,
"logits/real": 4.77290678024292,
"logps/generated": -285.8045959472656,
"logps/real": -244.49655151367188,
"loss": 0.6525,
"rewards/accuracies": 0.7749999761581421,
"rewards/generated": -0.012522486969828606,
"rewards/margins": 0.08892401307821274,
"rewards/real": 0.07640153169631958,
"step": 410
},
{
"epoch": 0.27,
"learning_rate": 4.4776119402985074e-07,
"logits/generated": 6.560798645019531,
"logits/real": 4.509766578674316,
"logps/generated": -264.9848327636719,
"logps/real": -250.44418334960938,
"loss": 0.6491,
"rewards/accuracies": 0.6625000238418579,
"rewards/generated": -0.00984070636332035,
"rewards/margins": 0.07440716028213501,
"rewards/real": 0.06456644833087921,
"step": 420
},
{
"epoch": 0.28,
"learning_rate": 4.5842217484008526e-07,
"logits/generated": 6.5955400466918945,
"logits/real": 4.605684757232666,
"logps/generated": -271.46160888671875,
"logps/real": -233.9333953857422,
"loss": 0.6469,
"rewards/accuracies": 0.75,
"rewards/generated": -0.016519565135240555,
"rewards/margins": 0.10620995610952377,
"rewards/real": 0.08969040215015411,
"step": 430
},
{
"epoch": 0.28,
"learning_rate": 4.690831556503198e-07,
"logits/generated": 6.498379707336426,
"logits/real": 4.555215358734131,
"logps/generated": -275.9375,
"logps/real": -245.8618621826172,
"loss": 0.6381,
"rewards/accuracies": 0.7250000238418579,
"rewards/generated": -0.019702184945344925,
"rewards/margins": 0.10273710638284683,
"rewards/real": 0.08303491771221161,
"step": 440
},
{
"epoch": 0.29,
"learning_rate": 4.797441364605543e-07,
"logits/generated": 6.488035678863525,
"logits/real": 4.758559226989746,
"logps/generated": -292.8480224609375,
"logps/real": -235.81704711914062,
"loss": 0.6266,
"rewards/accuracies": 0.862500011920929,
"rewards/generated": -0.0321931466460228,
"rewards/margins": 0.13845106959342957,
"rewards/real": 0.10625793039798737,
"step": 450
},
{
"epoch": 0.29,
"learning_rate": 4.904051172707888e-07,
"logits/generated": 6.370500564575195,
"logits/real": 5.3891401290893555,
"logps/generated": -288.18426513671875,
"logps/real": -262.585693359375,
"loss": 0.615,
"rewards/accuracies": 0.8374999761581421,
"rewards/generated": -0.047702450305223465,
"rewards/margins": 0.16840405762195587,
"rewards/real": 0.12070159614086151,
"step": 460
},
{
"epoch": 0.3,
"learning_rate": 4.998815165876776e-07,
"logits/generated": 6.4978346824646,
"logits/real": 4.823352813720703,
"logps/generated": -282.669921875,
"logps/real": -243.49526977539062,
"loss": 0.616,
"rewards/accuracies": 0.862500011920929,
"rewards/generated": -0.03011218085885048,
"rewards/margins": 0.1787882149219513,
"rewards/real": 0.14867602288722992,
"step": 470
},
{
"epoch": 0.31,
"learning_rate": 4.98696682464455e-07,
"logits/generated": 6.336424350738525,
"logits/real": 4.733750343322754,
"logps/generated": -267.9712829589844,
"logps/real": -241.704345703125,
"loss": 0.6071,
"rewards/accuracies": 0.8374999761581421,
"rewards/generated": -0.01858561486005783,
"rewards/margins": 0.17029884457588196,
"rewards/real": 0.15171322226524353,
"step": 480
},
{
"epoch": 0.31,
"learning_rate": 4.975118483412322e-07,
"logits/generated": 6.465763092041016,
"logits/real": 4.750241279602051,
"logps/generated": -276.46527099609375,
"logps/real": -227.153564453125,
"loss": 0.5994,
"rewards/accuracies": 0.8125,
"rewards/generated": -0.020980175584554672,
"rewards/margins": 0.18550223112106323,
"rewards/real": 0.16452205181121826,
"step": 490
},
{
"epoch": 0.32,
"learning_rate": 4.963270142180094e-07,
"logits/generated": 6.40543270111084,
"logits/real": 4.948707103729248,
"logps/generated": -280.562255859375,
"logps/real": -245.1046142578125,
"loss": 0.5866,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -0.08627736568450928,
"rewards/margins": 0.2752222418785095,
"rewards/real": 0.18894490599632263,
"step": 500
},
{
"epoch": 0.33,
"learning_rate": 4.951421800947867e-07,
"logits/generated": 6.454255104064941,
"logits/real": 4.428702354431152,
"logps/generated": -277.8967590332031,
"logps/real": -239.1210479736328,
"loss": 0.5713,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -0.04224959388375282,
"rewards/margins": 0.2869378924369812,
"rewards/real": 0.24468836188316345,
"step": 510
},
{
"epoch": 0.33,
"learning_rate": 4.93957345971564e-07,
"logits/generated": 6.436443328857422,
"logits/real": 4.778555393218994,
"logps/generated": -269.21307373046875,
"logps/real": -231.3462677001953,
"loss": 0.5635,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -0.027034465223550797,
"rewards/margins": 0.30413612723350525,
"rewards/real": 0.27710166573524475,
"step": 520
},
{
"epoch": 0.34,
"learning_rate": 4.927725118483413e-07,
"logits/generated": 6.379159927368164,
"logits/real": 4.740503787994385,
"logps/generated": -271.0333557128906,
"logps/real": -231.5590057373047,
"loss": 0.5598,
"rewards/accuracies": 0.9375,
"rewards/generated": -0.03343794867396355,
"rewards/margins": 0.2944517731666565,
"rewards/real": 0.26101383566856384,
"step": 530
},
{
"epoch": 0.35,
"learning_rate": 4.915876777251184e-07,
"logits/generated": 6.4404754638671875,
"logits/real": 4.698214054107666,
"logps/generated": -269.7973937988281,
"logps/real": -245.9893798828125,
"loss": 0.5512,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -0.03939288109540939,
"rewards/margins": 0.3222863972187042,
"rewards/real": 0.28289347887039185,
"step": 540
},
{
"epoch": 0.35,
"learning_rate": 4.904028436018957e-07,
"logits/generated": 6.421341896057129,
"logits/real": 4.729471683502197,
"logps/generated": -283.0313720703125,
"logps/real": -245.7028350830078,
"loss": 0.5334,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -0.06327588111162186,
"rewards/margins": 0.36130860447883606,
"rewards/real": 0.2980327010154724,
"step": 550
},
{
"epoch": 0.36,
"learning_rate": 4.892180094786729e-07,
"logits/generated": 6.3695831298828125,
"logits/real": 4.896943092346191,
"logps/generated": -275.6328125,
"logps/real": -234.8939666748047,
"loss": 0.5383,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -0.043337948620319366,
"rewards/margins": 0.3582867383956909,
"rewards/real": 0.31494876742362976,
"step": 560
},
{
"epoch": 0.36,
"learning_rate": 4.880331753554502e-07,
"logits/generated": 6.409314155578613,
"logits/real": 4.403968811035156,
"logps/generated": -288.6573181152344,
"logps/real": -252.0238800048828,
"loss": 0.5142,
"rewards/accuracies": 0.9375,
"rewards/generated": -0.09962789714336395,
"rewards/margins": 0.4413130283355713,
"rewards/real": 0.34168511629104614,
"step": 570
},
{
"epoch": 0.37,
"learning_rate": 4.868483412322275e-07,
"logits/generated": 6.514178276062012,
"logits/real": 4.654687404632568,
"logps/generated": -276.91796875,
"logps/real": -247.6042938232422,
"loss": 0.5149,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -0.07225228101015091,
"rewards/margins": 0.36416369676589966,
"rewards/real": 0.29191142320632935,
"step": 580
},
{
"epoch": 0.38,
"learning_rate": 4.856635071090047e-07,
"logits/generated": 6.484377861022949,
"logits/real": 4.610289096832275,
"logps/generated": -287.1705322265625,
"logps/real": -248.52066040039062,
"loss": 0.4863,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -0.12423114478588104,
"rewards/margins": 0.5095083713531494,
"rewards/real": 0.38527724146842957,
"step": 590
},
{
"epoch": 0.38,
"learning_rate": 4.84478672985782e-07,
"logits/generated": 6.429436683654785,
"logits/real": 4.436240196228027,
"logps/generated": -285.28814697265625,
"logps/real": -233.54312133789062,
"loss": 0.4702,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -0.11845574527978897,
"rewards/margins": 0.5989123582839966,
"rewards/real": 0.4804566502571106,
"step": 600
},
{
"epoch": 0.39,
"learning_rate": 4.832938388625591e-07,
"logits/generated": 6.502951145172119,
"logits/real": 4.6671462059021,
"logps/generated": -281.2021179199219,
"logps/real": -235.6467742919922,
"loss": 0.4764,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -0.07756136357784271,
"rewards/margins": 0.5416086316108704,
"rewards/real": 0.46404728293418884,
"step": 610
},
{
"epoch": 0.4,
"learning_rate": 4.821090047393365e-07,
"logits/generated": 6.4275922775268555,
"logits/real": 4.628044128417969,
"logps/generated": -287.75958251953125,
"logps/real": -230.74954223632812,
"loss": 0.4542,
"rewards/accuracies": 0.9375,
"rewards/generated": -0.11417678743600845,
"rewards/margins": 0.655004620552063,
"rewards/real": 0.5408278703689575,
"step": 620
},
{
"epoch": 0.4,
"learning_rate": 4.809241706161137e-07,
"logits/generated": 6.4412336349487305,
"logits/real": 5.0383100509643555,
"logps/generated": -262.37652587890625,
"logps/real": -250.28353881835938,
"loss": 0.4457,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -0.051359303295612335,
"rewards/margins": 0.6307464838027954,
"rewards/real": 0.5793871879577637,
"step": 630
},
{
"epoch": 0.41,
"learning_rate": 4.79739336492891e-07,
"logits/generated": 6.4190993309021,
"logits/real": 4.736918926239014,
"logps/generated": -292.39813232421875,
"logps/real": -238.57089233398438,
"loss": 0.4305,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -0.06552235782146454,
"rewards/margins": 0.6890299916267395,
"rewards/real": 0.623507559299469,
"step": 640
},
{
"epoch": 0.42,
"learning_rate": 4.785545023696682e-07,
"logits/generated": 6.509694576263428,
"logits/real": 4.679415702819824,
"logps/generated": -283.118896484375,
"logps/real": -251.4104766845703,
"loss": 0.414,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -0.12958040833473206,
"rewards/margins": 0.7429105043411255,
"rewards/real": 0.613330066204071,
"step": 650
},
{
"epoch": 0.42,
"learning_rate": 4.773696682464455e-07,
"logits/generated": 6.4275031089782715,
"logits/real": 4.612570285797119,
"logps/generated": -283.679931640625,
"logps/real": -234.7230987548828,
"loss": 0.4117,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -0.12087702751159668,
"rewards/margins": 0.8004452586174011,
"rewards/real": 0.679568350315094,
"step": 660
},
{
"epoch": 0.43,
"learning_rate": 4.7618483412322273e-07,
"logits/generated": 6.529521942138672,
"logits/real": 4.53403377532959,
"logps/generated": -276.1392822265625,
"logps/real": -226.3787078857422,
"loss": 0.4186,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -0.10057850182056427,
"rewards/margins": 0.805554211139679,
"rewards/real": 0.7049756646156311,
"step": 670
},
{
"epoch": 0.44,
"learning_rate": 4.7499999999999995e-07,
"logits/generated": 6.591378688812256,
"logits/real": 4.763890743255615,
"logps/generated": -295.3109436035156,
"logps/real": -235.9544677734375,
"loss": 0.3916,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -0.20255105197429657,
"rewards/margins": 0.8574334979057312,
"rewards/real": 0.654882550239563,
"step": 680
},
{
"epoch": 0.44,
"learning_rate": 4.738151658767772e-07,
"logits/generated": 6.50357723236084,
"logits/real": 4.819157123565674,
"logps/generated": -287.8582763671875,
"logps/real": -247.63803100585938,
"loss": 0.381,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.17564311623573303,
"rewards/margins": 0.9119162559509277,
"rewards/real": 0.7362731695175171,
"step": 690
},
{
"epoch": 0.45,
"learning_rate": 4.726303317535545e-07,
"logits/generated": 6.48019552230835,
"logits/real": 4.711845397949219,
"logps/generated": -269.28668212890625,
"logps/real": -223.31192016601562,
"loss": 0.385,
"rewards/accuracies": 0.9375,
"rewards/generated": -0.08161990344524384,
"rewards/margins": 0.8838006258010864,
"rewards/real": 0.8021806478500366,
"step": 700
},
{
"epoch": 0.45,
"learning_rate": 4.7144549763033177e-07,
"logits/generated": 6.481300354003906,
"logits/real": 4.540422439575195,
"logps/generated": -285.6842346191406,
"logps/real": -235.37417602539062,
"loss": 0.3653,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -0.19102832674980164,
"rewards/margins": 0.9631514549255371,
"rewards/real": 0.7721230983734131,
"step": 710
},
{
"epoch": 0.46,
"learning_rate": 4.70260663507109e-07,
"logits/generated": 6.4215497970581055,
"logits/real": 4.966759204864502,
"logps/generated": -279.190185546875,
"logps/real": -242.8605194091797,
"loss": 0.352,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -0.12265386432409286,
"rewards/margins": 0.973602294921875,
"rewards/real": 0.8509486317634583,
"step": 720
},
{
"epoch": 0.47,
"learning_rate": 4.690758293838862e-07,
"logits/generated": 6.504040718078613,
"logits/real": 5.0368146896362305,
"logps/generated": -269.21197509765625,
"logps/real": -249.8148956298828,
"loss": 0.3605,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -0.09707117080688477,
"rewards/margins": 0.9983582496643066,
"rewards/real": 0.9012872576713562,
"step": 730
},
{
"epoch": 0.47,
"learning_rate": 4.678909952606635e-07,
"logits/generated": 6.4590349197387695,
"logits/real": 4.72456693649292,
"logps/generated": -287.1020202636719,
"logps/real": -243.59365844726562,
"loss": 0.3471,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -0.19013145565986633,
"rewards/margins": 1.1382641792297363,
"rewards/real": 0.9481328129768372,
"step": 740
},
{
"epoch": 0.48,
"learning_rate": 4.667061611374407e-07,
"logits/generated": 6.586329460144043,
"logits/real": 4.696109771728516,
"logps/generated": -294.53399658203125,
"logps/real": -256.1171569824219,
"loss": 0.3363,
"rewards/accuracies": 0.9375,
"rewards/generated": -0.23052604496479034,
"rewards/margins": 1.134413242340088,
"rewards/real": 0.9038872718811035,
"step": 750
},
{
"epoch": 0.49,
"learning_rate": 4.65521327014218e-07,
"logits/generated": 6.411507606506348,
"logits/real": 5.131626129150391,
"logps/generated": -288.7245178222656,
"logps/real": -269.36541748046875,
"loss": 0.3392,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -0.23499789834022522,
"rewards/margins": 1.1348694562911987,
"rewards/real": 0.8998715281486511,
"step": 760
},
{
"epoch": 0.49,
"learning_rate": 4.6433649289099525e-07,
"logits/generated": 6.4309258460998535,
"logits/real": 4.649967193603516,
"logps/generated": -277.1844787597656,
"logps/real": -242.71011352539062,
"loss": 0.3126,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -0.2691335380077362,
"rewards/margins": 1.2196036577224731,
"rewards/real": 0.9504700899124146,
"step": 770
},
{
"epoch": 0.5,
"learning_rate": 4.631516587677725e-07,
"logits/generated": 6.520060062408447,
"logits/real": 4.70443058013916,
"logps/generated": -276.5824890136719,
"logps/real": -213.98782348632812,
"loss": 0.3163,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -0.2217775583267212,
"rewards/margins": 1.2883174419403076,
"rewards/real": 1.0665398836135864,
"step": 780
},
{
"epoch": 0.51,
"learning_rate": 4.6196682464454974e-07,
"logits/generated": 6.340624809265137,
"logits/real": 4.910890102386475,
"logps/generated": -276.492431640625,
"logps/real": -250.92135620117188,
"loss": 0.3078,
"rewards/accuracies": 0.9375,
"rewards/generated": -0.26993709802627563,
"rewards/margins": 1.3064370155334473,
"rewards/real": 1.0364999771118164,
"step": 790
},
{
"epoch": 0.51,
"learning_rate": 4.60781990521327e-07,
"logits/generated": 6.535134315490723,
"logits/real": 4.642252445220947,
"logps/generated": -282.04254150390625,
"logps/real": -224.3877716064453,
"loss": 0.2998,
"rewards/accuracies": 0.9375,
"rewards/generated": -0.2987816333770752,
"rewards/margins": 1.3563276529312134,
"rewards/real": 1.057545781135559,
"step": 800
},
{
"epoch": 0.52,
"learning_rate": 4.5959715639810423e-07,
"logits/generated": 6.41034460067749,
"logits/real": 4.7313551902771,
"logps/generated": -269.9966735839844,
"logps/real": -222.67361450195312,
"loss": 0.3116,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -0.1768857091665268,
"rewards/margins": 1.3481709957122803,
"rewards/real": 1.1712852716445923,
"step": 810
},
{
"epoch": 0.52,
"learning_rate": 4.5841232227488145e-07,
"logits/generated": 6.5445876121521,
"logits/real": 4.706090450286865,
"logps/generated": -292.2023010253906,
"logps/real": -230.2037353515625,
"loss": 0.3059,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.3371911942958832,
"rewards/margins": 1.470198392868042,
"rewards/real": 1.1330074071884155,
"step": 820
},
{
"epoch": 0.53,
"learning_rate": 4.5722748815165873e-07,
"logits/generated": 6.492764949798584,
"logits/real": 4.727120876312256,
"logps/generated": -286.11614990234375,
"logps/real": -236.4132080078125,
"loss": 0.2588,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -0.34763607382774353,
"rewards/margins": 1.618819236755371,
"rewards/real": 1.2711832523345947,
"step": 830
},
{
"epoch": 0.54,
"learning_rate": 4.56042654028436e-07,
"logits/generated": 6.487623691558838,
"logits/real": 4.916855335235596,
"logps/generated": -301.30670166015625,
"logps/real": -261.5294494628906,
"loss": 0.2905,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -0.26863616704940796,
"rewards/margins": 1.2958793640136719,
"rewards/real": 1.0272432565689087,
"step": 840
},
{
"epoch": 0.54,
"learning_rate": 4.5485781990521327e-07,
"logits/generated": 6.653237342834473,
"logits/real": 4.623661518096924,
"logps/generated": -285.1461181640625,
"logps/real": -243.27243041992188,
"loss": 0.3001,
"rewards/accuracies": 0.9375,
"rewards/generated": -0.20099958777427673,
"rewards/margins": 1.3751389980316162,
"rewards/real": 1.174139380455017,
"step": 850
},
{
"epoch": 0.55,
"learning_rate": 4.536729857819905e-07,
"logits/generated": 6.405764579772949,
"logits/real": 4.904845714569092,
"logps/generated": -265.61737060546875,
"logps/real": -236.12594604492188,
"loss": 0.2656,
"rewards/accuracies": 0.9375,
"rewards/generated": -0.2864135205745697,
"rewards/margins": 1.4618184566497803,
"rewards/real": 1.1754049062728882,
"step": 860
},
{
"epoch": 0.56,
"learning_rate": 4.5248815165876776e-07,
"logits/generated": 6.408907890319824,
"logits/real": 4.733492374420166,
"logps/generated": -282.3438415527344,
"logps/real": -248.62728881835938,
"loss": 0.2501,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -0.32550299167633057,
"rewards/margins": 1.5754355192184448,
"rewards/real": 1.2499325275421143,
"step": 870
},
{
"epoch": 0.56,
"learning_rate": 4.5130331753554504e-07,
"logits/generated": 6.523440361022949,
"logits/real": 4.893500804901123,
"logps/generated": -274.6362609863281,
"logps/real": -239.6786346435547,
"loss": 0.2517,
"rewards/accuracies": 0.9375,
"rewards/generated": -0.37395572662353516,
"rewards/margins": 1.6604623794555664,
"rewards/real": 1.2865066528320312,
"step": 880
},
{
"epoch": 0.57,
"learning_rate": 4.5011848341232226e-07,
"logits/generated": 6.497984886169434,
"logits/real": 4.835686683654785,
"logps/generated": -284.5157165527344,
"logps/real": -240.1763458251953,
"loss": 0.2639,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -0.3406885862350464,
"rewards/margins": 1.6514053344726562,
"rewards/real": 1.3107168674468994,
"step": 890
},
{
"epoch": 0.58,
"learning_rate": 4.489336492890995e-07,
"logits/generated": 6.601628303527832,
"logits/real": 4.59323787689209,
"logps/generated": -289.57421875,
"logps/real": -227.0326690673828,
"loss": 0.2481,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -0.4485829770565033,
"rewards/margins": 1.7304086685180664,
"rewards/real": 1.2818256616592407,
"step": 900
},
{
"epoch": 0.58,
"learning_rate": 4.4774881516587675e-07,
"logits/generated": 6.438521385192871,
"logits/real": 5.212011814117432,
"logps/generated": -274.275146484375,
"logps/real": -256.7066345214844,
"loss": 0.2254,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.3772863745689392,
"rewards/margins": 1.8105132579803467,
"rewards/real": 1.4332268238067627,
"step": 910
},
{
"epoch": 0.59,
"learning_rate": 4.46563981042654e-07,
"logits/generated": 6.509451389312744,
"logits/real": 4.610627174377441,
"logps/generated": -291.47552490234375,
"logps/real": -251.83706665039062,
"loss": 0.2307,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.4892211854457855,
"rewards/margins": 1.7918579578399658,
"rewards/real": 1.3026366233825684,
"step": 920
},
{
"epoch": 0.6,
"learning_rate": 4.4537914691943124e-07,
"logits/generated": 6.490546226501465,
"logits/real": 4.814209938049316,
"logps/generated": -273.9214782714844,
"logps/real": -221.7511444091797,
"loss": 0.2508,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -0.4122149348258972,
"rewards/margins": 1.7060003280639648,
"rewards/real": 1.2937853336334229,
"step": 930
},
{
"epoch": 0.6,
"learning_rate": 4.441943127962085e-07,
"logits/generated": 6.425353050231934,
"logits/real": 5.0255045890808105,
"logps/generated": -280.7168884277344,
"logps/real": -247.033935546875,
"loss": 0.2304,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -0.47266262769699097,
"rewards/margins": 1.8882691860198975,
"rewards/real": 1.4156067371368408,
"step": 940
},
{
"epoch": 0.61,
"learning_rate": 4.430094786729858e-07,
"logits/generated": 6.480774879455566,
"logits/real": 4.786948204040527,
"logps/generated": -291.06512451171875,
"logps/real": -209.83615112304688,
"loss": 0.2214,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -0.4364975094795227,
"rewards/margins": 1.8951022624969482,
"rewards/real": 1.4586045742034912,
"step": 950
},
{
"epoch": 0.61,
"learning_rate": 4.4182464454976306e-07,
"logits/generated": 6.375167369842529,
"logits/real": 5.035046577453613,
"logps/generated": -276.30718994140625,
"logps/real": -235.35733032226562,
"loss": 0.2082,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -0.40835681557655334,
"rewards/margins": 2.0441970825195312,
"rewards/real": 1.6358401775360107,
"step": 960
},
{
"epoch": 0.62,
"learning_rate": 4.4063981042654023e-07,
"logits/generated": 6.5355730056762695,
"logits/real": 4.934959888458252,
"logps/generated": -287.76385498046875,
"logps/real": -228.20425415039062,
"loss": 0.1887,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -0.589410126209259,
"rewards/margins": 2.164210557937622,
"rewards/real": 1.5748002529144287,
"step": 970
},
{
"epoch": 0.63,
"learning_rate": 4.394549763033175e-07,
"logits/generated": 6.452719211578369,
"logits/real": 4.160326957702637,
"logps/generated": -283.5166931152344,
"logps/real": -217.1543426513672,
"loss": 0.2238,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -0.24201051890850067,
"rewards/margins": 2.0650689601898193,
"rewards/real": 1.8230584859848022,
"step": 980
},
{
"epoch": 0.63,
"learning_rate": 4.382701421800948e-07,
"logits/generated": 6.507842063903809,
"logits/real": 5.069902420043945,
"logps/generated": -269.7446594238281,
"logps/real": -248.04714965820312,
"loss": 0.1762,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -0.44552716612815857,
"rewards/margins": 2.1091365814208984,
"rewards/real": 1.6636091470718384,
"step": 990
},
{
"epoch": 0.64,
"learning_rate": 4.37085308056872e-07,
"logits/generated": 6.380208492279053,
"logits/real": 4.518065452575684,
"logps/generated": -274.2218322753906,
"logps/real": -219.1566925048828,
"loss": 0.2048,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.46542254090309143,
"rewards/margins": 2.0480804443359375,
"rewards/real": 1.5826579332351685,
"step": 1000
},
{
"epoch": 0.65,
"learning_rate": 4.3590047393364927e-07,
"logits/generated": 6.60736608505249,
"logits/real": 4.692660331726074,
"logps/generated": -273.3490295410156,
"logps/real": -219.55429077148438,
"loss": 0.2376,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -0.2550446093082428,
"rewards/margins": 1.8392921686172485,
"rewards/real": 1.5842477083206177,
"step": 1010
},
{
"epoch": 0.65,
"learning_rate": 4.3471563981042654e-07,
"logits/generated": 6.539947509765625,
"logits/real": 4.22275447845459,
"logps/generated": -281.519775390625,
"logps/real": -199.8513641357422,
"loss": 0.1947,
"rewards/accuracies": 1.0,
"rewards/generated": -0.596612811088562,
"rewards/margins": 2.411491870880127,
"rewards/real": 1.8148788213729858,
"step": 1020
},
{
"epoch": 0.66,
"learning_rate": 4.335308056872038e-07,
"logits/generated": 6.498897552490234,
"logits/real": 4.682340621948242,
"logps/generated": -276.3707580566406,
"logps/real": -240.2972412109375,
"loss": 0.2016,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -0.45306864380836487,
"rewards/margins": 1.9954931735992432,
"rewards/real": 1.5424243211746216,
"step": 1030
},
{
"epoch": 0.67,
"learning_rate": 4.32345971563981e-07,
"logits/generated": 6.35316276550293,
"logits/real": 4.76320219039917,
"logps/generated": -291.69427490234375,
"logps/real": -244.2530517578125,
"loss": 0.1921,
"rewards/accuracies": 0.9375,
"rewards/generated": -0.6429895162582397,
"rewards/margins": 2.3938496112823486,
"rewards/real": 1.7508599758148193,
"step": 1040
},
{
"epoch": 0.67,
"learning_rate": 4.3116113744075825e-07,
"logits/generated": 6.538656711578369,
"logits/real": 4.506134986877441,
"logps/generated": -297.8096923828125,
"logps/real": -223.4852294921875,
"loss": 0.1945,
"rewards/accuracies": 0.9375,
"rewards/generated": -0.7660019397735596,
"rewards/margins": 2.628037214279175,
"rewards/real": 1.8620353937149048,
"step": 1050
},
{
"epoch": 0.68,
"learning_rate": 4.299763033175355e-07,
"logits/generated": 6.559047698974609,
"logits/real": 4.753483772277832,
"logps/generated": -283.09259033203125,
"logps/real": -231.58828735351562,
"loss": 0.1659,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -0.5766782164573669,
"rewards/margins": 2.1829142570495605,
"rewards/real": 1.6062358617782593,
"step": 1060
},
{
"epoch": 0.68,
"learning_rate": 4.2879146919431274e-07,
"logits/generated": 6.543219089508057,
"logits/real": 4.634883880615234,
"logps/generated": -292.83892822265625,
"logps/real": -237.899658203125,
"loss": 0.1848,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.7167092561721802,
"rewards/margins": 2.470365524291992,
"rewards/real": 1.7536563873291016,
"step": 1070
},
{
"epoch": 0.69,
"learning_rate": 4.2760663507109e-07,
"logits/generated": 6.605074405670166,
"logits/real": 4.641299724578857,
"logps/generated": -273.18243408203125,
"logps/real": -222.3721923828125,
"loss": 0.1918,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -0.5909595489501953,
"rewards/margins": 2.6222665309906006,
"rewards/real": 2.031306743621826,
"step": 1080
},
{
"epoch": 0.7,
"learning_rate": 4.264218009478673e-07,
"logits/generated": 6.473427772521973,
"logits/real": 4.688056945800781,
"logps/generated": -294.8439636230469,
"logps/real": -218.5131378173828,
"loss": 0.184,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -0.6649213433265686,
"rewards/margins": 2.604104518890381,
"rewards/real": 1.9391835927963257,
"step": 1090
},
{
"epoch": 0.7,
"learning_rate": 4.2523696682464456e-07,
"logits/generated": 6.37612247467041,
"logits/real": 4.925015449523926,
"logps/generated": -288.498779296875,
"logps/real": -232.14883422851562,
"loss": 0.1871,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.5896228551864624,
"rewards/margins": 2.15082049369812,
"rewards/real": 1.5611976385116577,
"step": 1100
},
{
"epoch": 0.71,
"learning_rate": 4.240521327014218e-07,
"logits/generated": 6.474294185638428,
"logits/real": 4.607337474822998,
"logps/generated": -271.22357177734375,
"logps/real": -217.6533660888672,
"loss": 0.1953,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -0.616405189037323,
"rewards/margins": 2.434063196182251,
"rewards/real": 1.8176580667495728,
"step": 1110
},
{
"epoch": 0.72,
"learning_rate": 4.22867298578199e-07,
"logits/generated": 6.460890769958496,
"logits/real": 4.362028121948242,
"logps/generated": -294.4446105957031,
"logps/real": -233.0,
"loss": 0.1522,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -0.9041115641593933,
"rewards/margins": 2.483468770980835,
"rewards/real": 1.5793571472167969,
"step": 1120
},
{
"epoch": 0.72,
"learning_rate": 4.216824644549763e-07,
"logits/generated": 6.5067267417907715,
"logits/real": 4.801357269287109,
"logps/generated": -283.41253662109375,
"logps/real": -229.07308959960938,
"loss": 0.1674,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -0.8184356689453125,
"rewards/margins": 2.9656457901000977,
"rewards/real": 2.147209644317627,
"step": 1130
},
{
"epoch": 0.73,
"learning_rate": 4.2049763033175355e-07,
"logits/generated": 6.525651454925537,
"logits/real": 4.5972700119018555,
"logps/generated": -273.68255615234375,
"logps/real": -226.0257568359375,
"loss": 0.1825,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.7906166315078735,
"rewards/margins": 2.4105656147003174,
"rewards/real": 1.6199489831924438,
"step": 1140
},
{
"epoch": 0.74,
"learning_rate": 4.1931279620853077e-07,
"logits/generated": 6.486138820648193,
"logits/real": 4.842376232147217,
"logps/generated": -297.69329833984375,
"logps/real": -236.61843872070312,
"loss": 0.1377,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -0.8111250996589661,
"rewards/margins": 2.760737657546997,
"rewards/real": 1.9496123790740967,
"step": 1150
},
{
"epoch": 0.74,
"learning_rate": 4.1812796208530804e-07,
"logits/generated": 6.288398265838623,
"logits/real": 5.120048522949219,
"logps/generated": -302.489990234375,
"logps/real": -220.8452606201172,
"loss": 0.1719,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -0.9194461107254028,
"rewards/margins": 2.725492000579834,
"rewards/real": 1.8060458898544312,
"step": 1160
},
{
"epoch": 0.75,
"learning_rate": 4.169431279620853e-07,
"logits/generated": 6.422547817230225,
"logits/real": 5.059387683868408,
"logps/generated": -285.9918518066406,
"logps/real": -246.36697387695312,
"loss": 0.1815,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.8091152310371399,
"rewards/margins": 2.879631280899048,
"rewards/real": 2.0705161094665527,
"step": 1170
},
{
"epoch": 0.75,
"learning_rate": 4.1575829383886253e-07,
"logits/generated": 6.4782843589782715,
"logits/real": 4.969704627990723,
"logps/generated": -287.41973876953125,
"logps/real": -207.5157928466797,
"loss": 0.191,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -0.8577953577041626,
"rewards/margins": 2.5767669677734375,
"rewards/real": 1.718971848487854,
"step": 1180
},
{
"epoch": 0.76,
"learning_rate": 4.145734597156398e-07,
"logits/generated": 6.450139045715332,
"logits/real": 4.915671348571777,
"logps/generated": -270.8932189941406,
"logps/real": -217.2313995361328,
"loss": 0.172,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -0.7773370146751404,
"rewards/margins": 2.6479990482330322,
"rewards/real": 1.870661973953247,
"step": 1190
},
{
"epoch": 0.77,
"learning_rate": 4.1338862559241703e-07,
"logits/generated": 6.705965518951416,
"logits/real": 4.81022834777832,
"logps/generated": -294.56597900390625,
"logps/real": -211.1824493408203,
"loss": 0.16,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -1.0764670372009277,
"rewards/margins": 3.1245856285095215,
"rewards/real": 2.0481185913085938,
"step": 1200
},
{
"epoch": 0.77,
"learning_rate": 4.122037914691943e-07,
"logits/generated": 6.361940860748291,
"logits/real": 4.980579376220703,
"logps/generated": -287.1803283691406,
"logps/real": -244.1328125,
"loss": 0.1562,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -0.976216197013855,
"rewards/margins": 2.9318318367004395,
"rewards/real": 1.9556156396865845,
"step": 1210
},
{
"epoch": 0.78,
"learning_rate": 4.110189573459715e-07,
"logits/generated": 6.496147155761719,
"logits/real": 4.533209800720215,
"logps/generated": -292.2799377441406,
"logps/real": -220.11685180664062,
"loss": 0.145,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -0.9167261123657227,
"rewards/margins": 3.0090811252593994,
"rewards/real": 2.0923550128936768,
"step": 1220
},
{
"epoch": 0.79,
"learning_rate": 4.098341232227488e-07,
"logits/generated": 6.39020299911499,
"logits/real": 4.5528459548950195,
"logps/generated": -286.52435302734375,
"logps/real": -214.3458251953125,
"loss": 0.1624,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -0.9529097676277161,
"rewards/margins": 2.9751744270324707,
"rewards/real": 2.0222644805908203,
"step": 1230
},
{
"epoch": 0.79,
"learning_rate": 4.0864928909952607e-07,
"logits/generated": 6.4211554527282715,
"logits/real": 4.6509599685668945,
"logps/generated": -282.4293518066406,
"logps/real": -223.8668975830078,
"loss": 0.167,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -0.7745916247367859,
"rewards/margins": 2.5993974208831787,
"rewards/real": 1.8248056173324585,
"step": 1240
},
{
"epoch": 0.8,
"learning_rate": 4.074644549763033e-07,
"logits/generated": 6.483539581298828,
"logits/real": 4.578310489654541,
"logps/generated": -286.9674987792969,
"logps/real": -238.4805145263672,
"loss": 0.1563,
"rewards/accuracies": 0.9375,
"rewards/generated": -0.8147264719009399,
"rewards/margins": 2.793470859527588,
"rewards/real": 1.9787445068359375,
"step": 1250
},
{
"epoch": 0.81,
"learning_rate": 4.0627962085308056e-07,
"logits/generated": 6.479307651519775,
"logits/real": 4.371944427490234,
"logps/generated": -293.8260803222656,
"logps/real": -221.93624877929688,
"loss": 0.1552,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -1.0739367008209229,
"rewards/margins": 2.962772846221924,
"rewards/real": 1.888836145401001,
"step": 1260
},
{
"epoch": 0.81,
"learning_rate": 4.0509478672985783e-07,
"logits/generated": 6.399652004241943,
"logits/real": 5.044413089752197,
"logps/generated": -293.4299621582031,
"logps/real": -239.0977325439453,
"loss": 0.1582,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.0476984977722168,
"rewards/margins": 3.119697332382202,
"rewards/real": 2.0719990730285645,
"step": 1270
},
{
"epoch": 0.82,
"learning_rate": 4.0390995260663505e-07,
"logits/generated": 6.399343967437744,
"logits/real": 5.123462677001953,
"logps/generated": -286.2010498046875,
"logps/real": -238.5330352783203,
"loss": 0.1431,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.8854296803474426,
"rewards/margins": 2.840718984603882,
"rewards/real": 1.9552892446517944,
"step": 1280
},
{
"epoch": 0.83,
"learning_rate": 4.0272511848341227e-07,
"logits/generated": 6.5136399269104,
"logits/real": 5.123744487762451,
"logps/generated": -277.1810607910156,
"logps/real": -224.10684204101562,
"loss": 0.1746,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -0.9058412313461304,
"rewards/margins": 2.760077953338623,
"rewards/real": 1.8542366027832031,
"step": 1290
},
{
"epoch": 0.83,
"learning_rate": 4.0154028436018954e-07,
"logits/generated": 6.532641410827637,
"logits/real": 4.748272895812988,
"logps/generated": -294.2531433105469,
"logps/real": -227.92483520507812,
"loss": 0.1391,
"rewards/accuracies": 1.0,
"rewards/generated": -1.3061493635177612,
"rewards/margins": 3.458925724029541,
"rewards/real": 2.152775764465332,
"step": 1300
},
{
"epoch": 0.84,
"learning_rate": 4.003554502369668e-07,
"logits/generated": 6.615334987640381,
"logits/real": 4.404749393463135,
"logps/generated": -301.03900146484375,
"logps/real": -221.3147430419922,
"loss": 0.1335,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -1.0821572542190552,
"rewards/margins": 3.1977028846740723,
"rewards/real": 2.1155455112457275,
"step": 1310
},
{
"epoch": 0.84,
"learning_rate": 3.991706161137441e-07,
"logits/generated": 6.418117523193359,
"logits/real": 4.882086753845215,
"logps/generated": -294.2748718261719,
"logps/real": -232.966552734375,
"loss": 0.133,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.2364888191223145,
"rewards/margins": 3.057285785675049,
"rewards/real": 1.8207969665527344,
"step": 1320
},
{
"epoch": 0.85,
"learning_rate": 3.979857819905213e-07,
"logits/generated": 6.474581241607666,
"logits/real": 4.630919933319092,
"logps/generated": -291.40313720703125,
"logps/real": -231.16006469726562,
"loss": 0.0996,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -1.1898620128631592,
"rewards/margins": 3.224040985107422,
"rewards/real": 2.0341789722442627,
"step": 1330
},
{
"epoch": 0.86,
"learning_rate": 3.968009478672986e-07,
"logits/generated": 6.528960227966309,
"logits/real": 4.991686820983887,
"logps/generated": -280.37518310546875,
"logps/real": -234.42623901367188,
"loss": 0.1544,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.0884606838226318,
"rewards/margins": 3.3553130626678467,
"rewards/real": 2.2668521404266357,
"step": 1340
},
{
"epoch": 0.86,
"learning_rate": 3.9561611374407585e-07,
"logits/generated": 6.5070061683654785,
"logits/real": 4.700498104095459,
"logps/generated": -285.8040466308594,
"logps/real": -211.8716278076172,
"loss": 0.15,
"rewards/accuracies": 0.9125000238418579,
"rewards/generated": -1.127325177192688,
"rewards/margins": 3.0416786670684814,
"rewards/real": 1.914353370666504,
"step": 1350
},
{
"epoch": 0.87,
"learning_rate": 3.94431279620853e-07,
"logits/generated": 6.504839897155762,
"logits/real": 4.721653938293457,
"logps/generated": -281.8013610839844,
"logps/real": -217.22647094726562,
"loss": 0.1237,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.1646500825881958,
"rewards/margins": 3.471494674682617,
"rewards/real": 2.306844711303711,
"step": 1360
},
{
"epoch": 0.88,
"learning_rate": 3.932464454976303e-07,
"logits/generated": 6.476781368255615,
"logits/real": 4.780390739440918,
"logps/generated": -298.7755432128906,
"logps/real": -242.40310668945312,
"loss": 0.147,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.1053074598312378,
"rewards/margins": 3.288050889968872,
"rewards/real": 2.1827430725097656,
"step": 1370
},
{
"epoch": 0.88,
"learning_rate": 3.9206161137440757e-07,
"logits/generated": 6.5268354415893555,
"logits/real": 4.777252197265625,
"logps/generated": -302.7318115234375,
"logps/real": -233.9207305908203,
"loss": 0.1532,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.3840343952178955,
"rewards/margins": 3.5235512256622314,
"rewards/real": 2.139516592025757,
"step": 1380
},
{
"epoch": 0.89,
"learning_rate": 3.9087677725118484e-07,
"logits/generated": 6.526573181152344,
"logits/real": 4.747769355773926,
"logps/generated": -291.31085205078125,
"logps/real": -239.3638916015625,
"loss": 0.1283,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.26791250705719,
"rewards/margins": 3.175063133239746,
"rewards/real": 1.9071502685546875,
"step": 1390
},
{
"epoch": 0.9,
"learning_rate": 3.8969194312796206e-07,
"logits/generated": 6.308130264282227,
"logits/real": 4.654840469360352,
"logps/generated": -301.1564636230469,
"logps/real": -234.5809326171875,
"loss": 0.1158,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.2920730113983154,
"rewards/margins": 3.129149913787842,
"rewards/real": 1.8370771408081055,
"step": 1400
},
{
"epoch": 0.9,
"learning_rate": 3.8850710900473933e-07,
"logits/generated": 6.504507541656494,
"logits/real": 4.65291166305542,
"logps/generated": -305.6498107910156,
"logps/real": -233.3607635498047,
"loss": 0.1156,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.4324750900268555,
"rewards/margins": 3.3325772285461426,
"rewards/real": 1.9001020193099976,
"step": 1410
},
{
"epoch": 0.91,
"learning_rate": 3.873222748815166e-07,
"logits/generated": 6.564852237701416,
"logits/real": 4.4015302658081055,
"logps/generated": -288.9748840332031,
"logps/real": -222.2322235107422,
"loss": 0.1362,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.43608820438385,
"rewards/margins": 3.308760404586792,
"rewards/real": 1.8726723194122314,
"step": 1420
},
{
"epoch": 0.91,
"learning_rate": 3.8613744075829377e-07,
"logits/generated": 6.503379821777344,
"logits/real": 4.592303276062012,
"logps/generated": -289.198486328125,
"logps/real": -226.54165649414062,
"loss": 0.1142,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.3943302631378174,
"rewards/margins": 3.6817550659179688,
"rewards/real": 2.2874248027801514,
"step": 1430
},
{
"epoch": 0.92,
"learning_rate": 3.8495260663507104e-07,
"logits/generated": 6.395134925842285,
"logits/real": 4.751669883728027,
"logps/generated": -280.8811340332031,
"logps/real": -227.56826782226562,
"loss": 0.1107,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -1.2301466464996338,
"rewards/margins": 3.3704135417938232,
"rewards/real": 2.1402671337127686,
"step": 1440
},
{
"epoch": 0.93,
"learning_rate": 3.837677725118483e-07,
"logits/generated": 6.485627174377441,
"logits/real": 4.733918190002441,
"logps/generated": -306.0613708496094,
"logps/real": -236.187744140625,
"loss": 0.1028,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.2733852863311768,
"rewards/margins": 3.3273322582244873,
"rewards/real": 2.0539469718933105,
"step": 1450
},
{
"epoch": 0.93,
"learning_rate": 3.825829383886256e-07,
"logits/generated": 6.625826358795166,
"logits/real": 4.502466201782227,
"logps/generated": -302.5285339355469,
"logps/real": -191.37582397460938,
"loss": 0.126,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -1.4649912118911743,
"rewards/margins": 3.6144001483917236,
"rewards/real": 2.149409532546997,
"step": 1460
},
{
"epoch": 0.94,
"learning_rate": 3.813981042654028e-07,
"logits/generated": 6.492133140563965,
"logits/real": 4.759311199188232,
"logps/generated": -302.1866149902344,
"logps/real": -230.4091796875,
"loss": 0.1152,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.367823600769043,
"rewards/margins": 3.351057529449463,
"rewards/real": 1.9832338094711304,
"step": 1470
},
{
"epoch": 0.95,
"learning_rate": 3.802132701421801e-07,
"logits/generated": 6.473480224609375,
"logits/real": 4.9179205894470215,
"logps/generated": -292.11358642578125,
"logps/real": -235.0968017578125,
"loss": 0.1294,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.268405795097351,
"rewards/margins": 3.70072603225708,
"rewards/real": 2.4323201179504395,
"step": 1480
},
{
"epoch": 0.95,
"learning_rate": 3.7902843601895736e-07,
"logits/generated": 6.590761661529541,
"logits/real": 4.755660057067871,
"logps/generated": -283.9560852050781,
"logps/real": -239.81893920898438,
"loss": 0.1209,
"rewards/accuracies": 0.9375,
"rewards/generated": -1.3544660806655884,
"rewards/margins": 3.5569427013397217,
"rewards/real": 2.202476978302002,
"step": 1490
},
{
"epoch": 0.96,
"learning_rate": 3.778436018957346e-07,
"logits/generated": 6.543065071105957,
"logits/real": 4.660527229309082,
"logps/generated": -293.5489501953125,
"logps/real": -231.8582305908203,
"loss": 0.1171,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.3222100734710693,
"rewards/margins": 3.1835999488830566,
"rewards/real": 1.8613903522491455,
"step": 1500
},
{
"epoch": 0.97,
"learning_rate": 3.766587677725118e-07,
"logits/generated": 6.483323097229004,
"logits/real": 4.6694488525390625,
"logps/generated": -292.2344665527344,
"logps/real": -229.16067504882812,
"loss": 0.1245,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.535994052886963,
"rewards/margins": 3.5554702281951904,
"rewards/real": 2.0194761753082275,
"step": 1510
},
{
"epoch": 0.97,
"learning_rate": 3.7547393364928907e-07,
"logits/generated": 6.537093162536621,
"logits/real": 4.834275722503662,
"logps/generated": -283.90765380859375,
"logps/real": -221.65771484375,
"loss": 0.1043,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.336133599281311,
"rewards/margins": 3.4120376110076904,
"rewards/real": 2.075904369354248,
"step": 1520
},
{
"epoch": 0.98,
"learning_rate": 3.7428909952606634e-07,
"logits/generated": 6.584852695465088,
"logits/real": 4.697403907775879,
"logps/generated": -293.49371337890625,
"logps/real": -230.45291137695312,
"loss": 0.1223,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -1.4560847282409668,
"rewards/margins": 3.7806944847106934,
"rewards/real": 2.3246102333068848,
"step": 1530
},
{
"epoch": 0.99,
"learning_rate": 3.7310426540284356e-07,
"logits/generated": 6.4898881912231445,
"logits/real": 4.787100791931152,
"logps/generated": -289.96502685546875,
"logps/real": -235.83511352539062,
"loss": 0.1452,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -1.4755102396011353,
"rewards/margins": 3.804914951324463,
"rewards/real": 2.329404592514038,
"step": 1540
},
{
"epoch": 0.99,
"learning_rate": 3.7191943127962083e-07,
"logits/generated": 6.573674201965332,
"logits/real": 4.813099384307861,
"logps/generated": -295.05950927734375,
"logps/real": -239.4409942626953,
"loss": 0.1134,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.601508378982544,
"rewards/margins": 3.6218199729919434,
"rewards/real": 2.0203115940093994,
"step": 1550
},
{
"epoch": 1.0,
"learning_rate": 3.707345971563981e-07,
"logits/generated": 6.518675804138184,
"logits/real": 4.694587707519531,
"logps/generated": -288.26116943359375,
"logps/real": -216.22702026367188,
"loss": 0.1253,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -1.3888970613479614,
"rewards/margins": 3.527122974395752,
"rewards/real": 2.1382253170013428,
"step": 1560
},
{
"epoch": 1.0,
"learning_rate": 3.695497630331754e-07,
"logits/generated": 6.476728916168213,
"logits/real": 4.867232322692871,
"logps/generated": -290.8531494140625,
"logps/real": -223.3974151611328,
"loss": 0.1379,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.48335599899292,
"rewards/margins": 3.3799889087677,
"rewards/real": 1.8966329097747803,
"step": 1570
},
{
"epoch": 1.01,
"learning_rate": 3.683649289099526e-07,
"logits/generated": 6.590670585632324,
"logits/real": 4.584352493286133,
"logps/generated": -280.3003845214844,
"logps/real": -222.9787139892578,
"loss": 0.1055,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.518668532371521,
"rewards/margins": 3.503495454788208,
"rewards/real": 1.9848268032073975,
"step": 1580
},
{
"epoch": 1.02,
"learning_rate": 3.671800947867298e-07,
"logits/generated": 6.4740777015686035,
"logits/real": 4.666982173919678,
"logps/generated": -293.92950439453125,
"logps/real": -241.7804718017578,
"loss": 0.0979,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.4945917129516602,
"rewards/margins": 3.7217907905578613,
"rewards/real": 2.227198839187622,
"step": 1590
},
{
"epoch": 1.02,
"learning_rate": 3.659952606635071e-07,
"logits/generated": 6.5625457763671875,
"logits/real": 4.97821044921875,
"logps/generated": -287.527099609375,
"logps/real": -244.9476776123047,
"loss": 0.1185,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -1.5237135887145996,
"rewards/margins": 3.7011260986328125,
"rewards/real": 2.177412509918213,
"step": 1600
},
{
"epoch": 1.03,
"learning_rate": 3.648104265402843e-07,
"logits/generated": 6.531602382659912,
"logits/real": 5.296113014221191,
"logps/generated": -283.7915954589844,
"logps/real": -241.6884765625,
"loss": 0.1083,
"rewards/accuracies": 0.9375,
"rewards/generated": -1.42214834690094,
"rewards/margins": 3.4255123138427734,
"rewards/real": 2.003364086151123,
"step": 1610
},
{
"epoch": 1.04,
"learning_rate": 3.636255924170616e-07,
"logits/generated": 6.558846473693848,
"logits/real": 4.663855075836182,
"logps/generated": -295.4413757324219,
"logps/real": -231.3755340576172,
"loss": 0.1392,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -1.4816911220550537,
"rewards/margins": 3.7079098224639893,
"rewards/real": 2.2262187004089355,
"step": 1620
},
{
"epoch": 1.04,
"learning_rate": 3.6244075829383886e-07,
"logits/generated": 6.471614837646484,
"logits/real": 4.710072994232178,
"logps/generated": -288.50982666015625,
"logps/real": -242.4681854248047,
"loss": 0.133,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -1.2834060192108154,
"rewards/margins": 3.245429277420044,
"rewards/real": 1.962023377418518,
"step": 1630
},
{
"epoch": 1.05,
"learning_rate": 3.6125592417061613e-07,
"logits/generated": 6.597050666809082,
"logits/real": 4.699416160583496,
"logps/generated": -309.9823303222656,
"logps/real": -241.0970001220703,
"loss": 0.1212,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -1.800374984741211,
"rewards/margins": 3.8445522785186768,
"rewards/real": 2.044177532196045,
"step": 1640
},
{
"epoch": 1.06,
"learning_rate": 3.6007109004739335e-07,
"logits/generated": 6.396651744842529,
"logits/real": 4.873335838317871,
"logps/generated": -302.5182800292969,
"logps/real": -225.38162231445312,
"loss": 0.096,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.7537730932235718,
"rewards/margins": 3.5806782245635986,
"rewards/real": 1.8269050121307373,
"step": 1650
},
{
"epoch": 1.06,
"learning_rate": 3.588862559241706e-07,
"logits/generated": 6.512624263763428,
"logits/real": 4.873417854309082,
"logps/generated": -310.1457824707031,
"logps/real": -221.52127075195312,
"loss": 0.1038,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -1.995270013809204,
"rewards/margins": 3.832683563232422,
"rewards/real": 1.8374135494232178,
"step": 1660
},
{
"epoch": 1.07,
"learning_rate": 3.5770142180094784e-07,
"logits/generated": 6.3861165046691895,
"logits/real": 4.902550220489502,
"logps/generated": -309.2914123535156,
"logps/real": -254.0144805908203,
"loss": 0.0882,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.0285847187042236,
"rewards/margins": 4.352484703063965,
"rewards/real": 2.323899745941162,
"step": 1670
},
{
"epoch": 1.07,
"learning_rate": 3.5651658767772506e-07,
"logits/generated": 6.3969197273254395,
"logits/real": 4.497953414916992,
"logps/generated": -282.2160949707031,
"logps/real": -221.9111328125,
"loss": 0.1083,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -1.500212550163269,
"rewards/margins": 3.4251670837402344,
"rewards/real": 1.9249544143676758,
"step": 1680
},
{
"epoch": 1.08,
"learning_rate": 3.5533175355450234e-07,
"logits/generated": 6.489283084869385,
"logits/real": 5.103555202484131,
"logps/generated": -296.80084228515625,
"logps/real": -254.9130859375,
"loss": 0.1188,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -1.7659108638763428,
"rewards/margins": 3.7144954204559326,
"rewards/real": 1.948584794998169,
"step": 1690
},
{
"epoch": 1.09,
"learning_rate": 3.541469194312796e-07,
"logits/generated": 6.5424089431762695,
"logits/real": 4.5776543617248535,
"logps/generated": -306.41778564453125,
"logps/real": -229.9923553466797,
"loss": 0.0874,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.5843690633773804,
"rewards/margins": 3.7243666648864746,
"rewards/real": 2.139997959136963,
"step": 1700
},
{
"epoch": 1.09,
"learning_rate": 3.529620853080569e-07,
"logits/generated": 6.53662109375,
"logits/real": 4.515005588531494,
"logps/generated": -281.1587829589844,
"logps/real": -198.87599182128906,
"loss": 0.1229,
"rewards/accuracies": 0.9375,
"rewards/generated": -1.793796181678772,
"rewards/margins": 3.6370015144348145,
"rewards/real": 1.8432050943374634,
"step": 1710
},
{
"epoch": 1.1,
"learning_rate": 3.517772511848341e-07,
"logits/generated": 6.402149200439453,
"logits/real": 5.097185134887695,
"logps/generated": -300.73248291015625,
"logps/real": -269.5830078125,
"loss": 0.0861,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.8012573719024658,
"rewards/margins": 4.034179210662842,
"rewards/real": 2.232922077178955,
"step": 1720
},
{
"epoch": 1.11,
"learning_rate": 3.505924170616114e-07,
"logits/generated": 6.531800746917725,
"logits/real": 4.443795204162598,
"logps/generated": -297.3340759277344,
"logps/real": -225.61459350585938,
"loss": 0.0949,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.9287163019180298,
"rewards/margins": 4.080899715423584,
"rewards/real": 2.1521835327148438,
"step": 1730
},
{
"epoch": 1.11,
"learning_rate": 3.4940758293838865e-07,
"logits/generated": 6.565011501312256,
"logits/real": 4.78118371963501,
"logps/generated": -300.0439453125,
"logps/real": -232.56979370117188,
"loss": 0.0998,
"rewards/accuracies": 1.0,
"rewards/generated": -1.7687839269638062,
"rewards/margins": 3.690317153930664,
"rewards/real": 1.921533226966858,
"step": 1740
},
{
"epoch": 1.12,
"learning_rate": 3.482227488151658e-07,
"logits/generated": 6.521812438964844,
"logits/real": 4.503942489624023,
"logps/generated": -304.3191223144531,
"logps/real": -216.3659210205078,
"loss": 0.105,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -1.9444500207901,
"rewards/margins": 4.122984886169434,
"rewards/real": 2.178535223007202,
"step": 1750
},
{
"epoch": 1.13,
"learning_rate": 3.470379146919431e-07,
"logits/generated": 6.601067543029785,
"logits/real": 4.580229759216309,
"logps/generated": -304.520751953125,
"logps/real": -218.858154296875,
"loss": 0.1114,
"rewards/accuracies": 0.9375,
"rewards/generated": -1.790876030921936,
"rewards/margins": 3.8865456581115723,
"rewards/real": 2.0956692695617676,
"step": 1760
},
{
"epoch": 1.13,
"learning_rate": 3.4585308056872036e-07,
"logits/generated": 6.4673662185668945,
"logits/real": 4.512519836425781,
"logps/generated": -309.33563232421875,
"logps/real": -237.4860076904297,
"loss": 0.0852,
"rewards/accuracies": 1.0,
"rewards/generated": -2.074862003326416,
"rewards/margins": 4.4387288093566895,
"rewards/real": 2.3638668060302734,
"step": 1770
},
{
"epoch": 1.14,
"learning_rate": 3.4466824644549763e-07,
"logits/generated": 6.52877140045166,
"logits/real": 4.660586833953857,
"logps/generated": -292.84735107421875,
"logps/real": -235.4850616455078,
"loss": 0.0748,
"rewards/accuracies": 1.0,
"rewards/generated": -1.910387635231018,
"rewards/margins": 4.243655204772949,
"rewards/real": 2.3332676887512207,
"step": 1780
},
{
"epoch": 1.15,
"learning_rate": 3.4348341232227485e-07,
"logits/generated": 6.508902549743652,
"logits/real": 5.103868007659912,
"logps/generated": -309.3510437011719,
"logps/real": -261.96673583984375,
"loss": 0.1137,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -1.9301677942276,
"rewards/margins": 3.7662911415100098,
"rewards/real": 1.8361234664916992,
"step": 1790
},
{
"epoch": 1.15,
"learning_rate": 3.422985781990521e-07,
"logits/generated": 6.578322410583496,
"logits/real": 4.619819641113281,
"logps/generated": -309.3769836425781,
"logps/real": -230.23489379882812,
"loss": 0.0928,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.1213955879211426,
"rewards/margins": 4.048805236816406,
"rewards/real": 1.9274095296859741,
"step": 1800
},
{
"epoch": 1.16,
"learning_rate": 3.411137440758294e-07,
"logits/generated": 6.638535499572754,
"logits/real": 4.866148948669434,
"logps/generated": -297.40826416015625,
"logps/real": -226.35610961914062,
"loss": 0.0875,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.079007625579834,
"rewards/margins": 4.372213840484619,
"rewards/real": 2.293206214904785,
"step": 1810
},
{
"epoch": 1.16,
"learning_rate": 3.3992890995260667e-07,
"logits/generated": 6.590002536773682,
"logits/real": 4.547215461730957,
"logps/generated": -288.596435546875,
"logps/real": -198.56236267089844,
"loss": 0.0912,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.8445419073104858,
"rewards/margins": 3.7398104667663574,
"rewards/real": 1.8952690362930298,
"step": 1820
},
{
"epoch": 1.17,
"learning_rate": 3.3874407582938384e-07,
"logits/generated": 6.5464019775390625,
"logits/real": 4.549433708190918,
"logps/generated": -316.94287109375,
"logps/real": -230.5665283203125,
"loss": 0.0736,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.1460461616516113,
"rewards/margins": 4.4059224128723145,
"rewards/real": 2.259875774383545,
"step": 1830
},
{
"epoch": 1.18,
"learning_rate": 3.375592417061611e-07,
"logits/generated": 6.43328332901001,
"logits/real": 4.823711395263672,
"logps/generated": -301.39202880859375,
"logps/real": -236.8686065673828,
"loss": 0.0918,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -1.8650691509246826,
"rewards/margins": 4.1491899490356445,
"rewards/real": 2.284120559692383,
"step": 1840
},
{
"epoch": 1.18,
"learning_rate": 3.363744075829384e-07,
"logits/generated": 6.538866996765137,
"logits/real": 4.6786723136901855,
"logps/generated": -291.01483154296875,
"logps/real": -196.77285766601562,
"loss": 0.0739,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -1.9935919046401978,
"rewards/margins": 4.511776447296143,
"rewards/real": 2.518184185028076,
"step": 1850
},
{
"epoch": 1.19,
"learning_rate": 3.351895734597156e-07,
"logits/generated": 6.427986145019531,
"logits/real": 4.777734279632568,
"logps/generated": -291.7196350097656,
"logps/real": -219.09597778320312,
"loss": 0.1,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.9151489734649658,
"rewards/margins": 3.943358898162842,
"rewards/real": 2.028210163116455,
"step": 1860
},
{
"epoch": 1.2,
"learning_rate": 3.340047393364929e-07,
"logits/generated": 6.557374477386475,
"logits/real": 4.517666339874268,
"logps/generated": -293.89178466796875,
"logps/real": -231.68222045898438,
"loss": 0.0845,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.155890703201294,
"rewards/margins": 4.3398518562316895,
"rewards/real": 2.1839611530303955,
"step": 1870
},
{
"epoch": 1.2,
"learning_rate": 3.3281990521327015e-07,
"logits/generated": 6.494599342346191,
"logits/real": 4.4522833824157715,
"logps/generated": -309.76556396484375,
"logps/real": -219.76806640625,
"loss": 0.0856,
"rewards/accuracies": 1.0,
"rewards/generated": -2.3567099571228027,
"rewards/margins": 4.560822486877441,
"rewards/real": 2.2041122913360596,
"step": 1880
},
{
"epoch": 1.21,
"learning_rate": 3.316350710900474e-07,
"logits/generated": 6.508890628814697,
"logits/real": 4.725305080413818,
"logps/generated": -294.90533447265625,
"logps/real": -225.0491180419922,
"loss": 0.1116,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -1.9600846767425537,
"rewards/margins": 3.896559476852417,
"rewards/real": 1.9364748001098633,
"step": 1890
},
{
"epoch": 1.22,
"learning_rate": 3.304502369668246e-07,
"logits/generated": 6.483962059020996,
"logits/real": 4.7496185302734375,
"logps/generated": -294.6922607421875,
"logps/real": -234.5235137939453,
"loss": 0.0806,
"rewards/accuracies": 1.0,
"rewards/generated": -2.0787155628204346,
"rewards/margins": 4.212123394012451,
"rewards/real": 2.1334080696105957,
"step": 1900
},
{
"epoch": 1.22,
"learning_rate": 3.2926540284360186e-07,
"logits/generated": 6.553183078765869,
"logits/real": 4.5973711013793945,
"logps/generated": -290.69122314453125,
"logps/real": -224.2595977783203,
"loss": 0.0786,
"rewards/accuracies": 1.0,
"rewards/generated": -2.1069176197052,
"rewards/margins": 4.262394905090332,
"rewards/real": 2.155477523803711,
"step": 1910
},
{
"epoch": 1.23,
"learning_rate": 3.2808056872037913e-07,
"logits/generated": 6.475919246673584,
"logits/real": 4.781458854675293,
"logps/generated": -293.8143615722656,
"logps/real": -242.02157592773438,
"loss": 0.0815,
"rewards/accuracies": 1.0,
"rewards/generated": -2.0358715057373047,
"rewards/margins": 4.052926063537598,
"rewards/real": 2.017054796218872,
"step": 1920
},
{
"epoch": 1.23,
"learning_rate": 3.2689573459715635e-07,
"logits/generated": 6.587837219238281,
"logits/real": 4.240110874176025,
"logps/generated": -300.80084228515625,
"logps/real": -197.26913452148438,
"loss": 0.0862,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.1164047718048096,
"rewards/margins": 4.440495014190674,
"rewards/real": 2.3240902423858643,
"step": 1930
},
{
"epoch": 1.24,
"learning_rate": 3.2571090047393363e-07,
"logits/generated": 6.373067855834961,
"logits/real": 4.968833923339844,
"logps/generated": -313.5643005371094,
"logps/real": -238.37448120117188,
"loss": 0.0616,
"rewards/accuracies": 1.0,
"rewards/generated": -2.1593971252441406,
"rewards/margins": 4.199219703674316,
"rewards/real": 2.0398221015930176,
"step": 1940
},
{
"epoch": 1.25,
"learning_rate": 3.245260663507109e-07,
"logits/generated": 6.488961219787598,
"logits/real": 4.785304069519043,
"logps/generated": -305.1136474609375,
"logps/real": -226.7467498779297,
"loss": 0.0907,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.0184876918792725,
"rewards/margins": 4.260189056396484,
"rewards/real": 2.241701126098633,
"step": 1950
},
{
"epoch": 1.25,
"learning_rate": 3.2334123222748817e-07,
"logits/generated": 6.571511745452881,
"logits/real": 4.719809055328369,
"logps/generated": -288.8949279785156,
"logps/real": -227.11788940429688,
"loss": 0.0826,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.8733266592025757,
"rewards/margins": 4.297513484954834,
"rewards/real": 2.424187183380127,
"step": 1960
},
{
"epoch": 1.26,
"learning_rate": 3.221563981042654e-07,
"logits/generated": 6.46596622467041,
"logits/real": 5.228185653686523,
"logps/generated": -317.7358703613281,
"logps/real": -263.435791015625,
"loss": 0.0856,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.4127206802368164,
"rewards/margins": 4.111409664154053,
"rewards/real": 1.6986888647079468,
"step": 1970
},
{
"epoch": 1.27,
"learning_rate": 3.209715639810426e-07,
"logits/generated": 6.481564998626709,
"logits/real": 4.614343166351318,
"logps/generated": -310.3409729003906,
"logps/real": -232.2714385986328,
"loss": 0.0894,
"rewards/accuracies": 0.9375,
"rewards/generated": -2.1927568912506104,
"rewards/margins": 4.123000144958496,
"rewards/real": 1.930242896080017,
"step": 1980
},
{
"epoch": 1.27,
"learning_rate": 3.197867298578199e-07,
"logits/generated": 6.458526611328125,
"logits/real": 4.698050498962402,
"logps/generated": -300.57391357421875,
"logps/real": -227.27169799804688,
"loss": 0.0675,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.231247901916504,
"rewards/margins": 4.3048787117004395,
"rewards/real": 2.0736308097839355,
"step": 1990
},
{
"epoch": 1.28,
"learning_rate": 3.186018957345971e-07,
"logits/generated": 6.454297065734863,
"logits/real": 4.6443586349487305,
"logps/generated": -299.6277160644531,
"logps/real": -228.95370483398438,
"loss": 0.0791,
"rewards/accuracies": 1.0,
"rewards/generated": -2.205613613128662,
"rewards/margins": 4.489793300628662,
"rewards/real": 2.2841796875,
"step": 2000
},
{
"epoch": 1.29,
"learning_rate": 3.174170616113744e-07,
"logits/generated": 6.567930698394775,
"logits/real": 4.493962287902832,
"logps/generated": -296.0806579589844,
"logps/real": -206.06979370117188,
"loss": 0.0601,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.3037962913513184,
"rewards/margins": 4.68963623046875,
"rewards/real": 2.3858392238616943,
"step": 2010
},
{
"epoch": 1.29,
"learning_rate": 3.1623222748815165e-07,
"logits/generated": 6.372540473937988,
"logits/real": 4.720137596130371,
"logps/generated": -283.2089538574219,
"logps/real": -228.1040802001953,
"loss": 0.0764,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.127713918685913,
"rewards/margins": 4.436669826507568,
"rewards/real": 2.3089561462402344,
"step": 2020
},
{
"epoch": 1.3,
"learning_rate": 3.150473933649289e-07,
"logits/generated": 6.5518317222595215,
"logits/real": 4.7907562255859375,
"logps/generated": -305.01019287109375,
"logps/real": -233.3303985595703,
"loss": 0.0768,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.040151834487915,
"rewards/margins": 4.2342705726623535,
"rewards/real": 2.1941189765930176,
"step": 2030
},
{
"epoch": 1.31,
"learning_rate": 3.1386255924170614e-07,
"logits/generated": 6.603424072265625,
"logits/real": 4.3683857917785645,
"logps/generated": -288.2603759765625,
"logps/real": -198.78309631347656,
"loss": 0.0797,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -1.926896333694458,
"rewards/margins": 4.150208950042725,
"rewards/real": 2.2233126163482666,
"step": 2040
},
{
"epoch": 1.31,
"learning_rate": 3.126777251184834e-07,
"logits/generated": 6.489920616149902,
"logits/real": 4.773646831512451,
"logps/generated": -312.36236572265625,
"logps/real": -234.63076782226562,
"loss": 0.062,
"rewards/accuracies": 1.0,
"rewards/generated": -2.2082715034484863,
"rewards/margins": 4.475545883178711,
"rewards/real": 2.2672739028930664,
"step": 2050
},
{
"epoch": 1.32,
"learning_rate": 3.1149289099526064e-07,
"logits/generated": 6.615997314453125,
"logits/real": 4.535744667053223,
"logps/generated": -283.09326171875,
"logps/real": -217.5418701171875,
"loss": 0.0765,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -2.2303497791290283,
"rewards/margins": 4.6097259521484375,
"rewards/real": 2.3793764114379883,
"step": 2060
},
{
"epoch": 1.32,
"learning_rate": 3.103080568720379e-07,
"logits/generated": 6.409584045410156,
"logits/real": 4.447390556335449,
"logps/generated": -290.5548095703125,
"logps/real": -227.99038696289062,
"loss": 0.0963,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.0898966789245605,
"rewards/margins": 4.26753568649292,
"rewards/real": 2.1776394844055176,
"step": 2070
},
{
"epoch": 1.33,
"learning_rate": 3.0912322274881513e-07,
"logits/generated": 6.556498050689697,
"logits/real": 4.974642753601074,
"logps/generated": -300.2599182128906,
"logps/real": -241.9363555908203,
"loss": 0.079,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.5023233890533447,
"rewards/margins": 4.799649238586426,
"rewards/real": 2.297325372695923,
"step": 2080
},
{
"epoch": 1.34,
"learning_rate": 3.079383886255924e-07,
"logits/generated": 6.597992897033691,
"logits/real": 4.567930698394775,
"logps/generated": -311.3286437988281,
"logps/real": -219.1073455810547,
"loss": 0.06,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.2045724391937256,
"rewards/margins": 4.53138542175293,
"rewards/real": 2.326812267303467,
"step": 2090
},
{
"epoch": 1.34,
"learning_rate": 3.067535545023697e-07,
"logits/generated": 6.559409141540527,
"logits/real": 4.554577827453613,
"logps/generated": -303.6340637207031,
"logps/real": -208.86483764648438,
"loss": 0.0687,
"rewards/accuracies": 1.0,
"rewards/generated": -2.4833426475524902,
"rewards/margins": 4.425951957702637,
"rewards/real": 1.9426090717315674,
"step": 2100
},
{
"epoch": 1.35,
"learning_rate": 3.055687203791469e-07,
"logits/generated": 6.533651828765869,
"logits/real": 4.764289379119873,
"logps/generated": -295.6280822753906,
"logps/real": -224.78646850585938,
"loss": 0.0814,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.3707690238952637,
"rewards/margins": 4.8221025466918945,
"rewards/real": 2.451333522796631,
"step": 2110
},
{
"epoch": 1.36,
"learning_rate": 3.0438388625592417e-07,
"logits/generated": 6.513457298278809,
"logits/real": 4.793578147888184,
"logps/generated": -311.8963928222656,
"logps/real": -224.7337646484375,
"loss": 0.0821,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.5289576053619385,
"rewards/margins": 4.804759502410889,
"rewards/real": 2.2758023738861084,
"step": 2120
},
{
"epoch": 1.36,
"learning_rate": 3.0319905213270144e-07,
"logits/generated": 6.621163368225098,
"logits/real": 4.561090469360352,
"logps/generated": -303.7142028808594,
"logps/real": -222.3753662109375,
"loss": 0.0836,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.326003074645996,
"rewards/margins": 4.459982872009277,
"rewards/real": 2.1339797973632812,
"step": 2130
},
{
"epoch": 1.37,
"learning_rate": 3.0201421800947866e-07,
"logits/generated": 6.580770969390869,
"logits/real": 4.773768901824951,
"logps/generated": -311.6327209472656,
"logps/real": -249.0811309814453,
"loss": 0.0835,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -2.3817009925842285,
"rewards/margins": 4.315882682800293,
"rewards/real": 1.934181809425354,
"step": 2140
},
{
"epoch": 1.38,
"learning_rate": 3.008293838862559e-07,
"logits/generated": 6.521528720855713,
"logits/real": 4.7063798904418945,
"logps/generated": -294.9180908203125,
"logps/real": -223.62643432617188,
"loss": 0.1009,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -2.208840847015381,
"rewards/margins": 4.619427680969238,
"rewards/real": 2.4105873107910156,
"step": 2150
},
{
"epoch": 1.38,
"learning_rate": 2.9964454976303315e-07,
"logits/generated": 6.510763645172119,
"logits/real": 4.770654201507568,
"logps/generated": -306.3185729980469,
"logps/real": -223.23654174804688,
"loss": 0.057,
"rewards/accuracies": 1.0,
"rewards/generated": -2.2273476123809814,
"rewards/margins": 4.425220966339111,
"rewards/real": 2.197873592376709,
"step": 2160
},
{
"epoch": 1.39,
"learning_rate": 2.984597156398104e-07,
"logits/generated": 6.569235801696777,
"logits/real": 4.330723762512207,
"logps/generated": -300.669677734375,
"logps/real": -213.9007568359375,
"loss": 0.0789,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.5640532970428467,
"rewards/margins": 4.792849540710449,
"rewards/real": 2.2287967205047607,
"step": 2170
},
{
"epoch": 1.39,
"learning_rate": 2.9727488151658765e-07,
"logits/generated": 6.663917541503906,
"logits/real": 4.72244119644165,
"logps/generated": -314.7903747558594,
"logps/real": -252.47933959960938,
"loss": 0.0659,
"rewards/accuracies": 1.0,
"rewards/generated": -2.60965895652771,
"rewards/margins": 4.690103054046631,
"rewards/real": 2.080443859100342,
"step": 2180
},
{
"epoch": 1.4,
"learning_rate": 2.960900473933649e-07,
"logits/generated": 6.465506553649902,
"logits/real": 5.010118007659912,
"logps/generated": -297.9461975097656,
"logps/real": -224.8966522216797,
"loss": 0.0514,
"rewards/accuracies": 1.0,
"rewards/generated": -2.0201261043548584,
"rewards/margins": 4.045393943786621,
"rewards/real": 2.025268077850342,
"step": 2190
},
{
"epoch": 1.41,
"learning_rate": 2.949052132701422e-07,
"logits/generated": 6.5695037841796875,
"logits/real": 4.864360332489014,
"logps/generated": -300.3662414550781,
"logps/real": -243.6785888671875,
"loss": 0.0647,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -2.213184118270874,
"rewards/margins": 4.579488754272461,
"rewards/real": 2.366304397583008,
"step": 2200
},
{
"epoch": 1.41,
"learning_rate": 2.9372037914691946e-07,
"logits/generated": 6.518437385559082,
"logits/real": 4.748892784118652,
"logps/generated": -309.49658203125,
"logps/real": -216.10751342773438,
"loss": 0.0583,
"rewards/accuracies": 1.0,
"rewards/generated": -2.4229207038879395,
"rewards/margins": 4.442441463470459,
"rewards/real": 2.0195205211639404,
"step": 2210
},
{
"epoch": 1.42,
"learning_rate": 2.9253554502369663e-07,
"logits/generated": 6.526637077331543,
"logits/real": 4.67498779296875,
"logps/generated": -313.6875,
"logps/real": -231.32199096679688,
"loss": 0.0762,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -2.6978516578674316,
"rewards/margins": 4.7451090812683105,
"rewards/real": 2.0472571849823,
"step": 2220
},
{
"epoch": 1.43,
"learning_rate": 2.913507109004739e-07,
"logits/generated": 6.5197906494140625,
"logits/real": 4.786774635314941,
"logps/generated": -300.3356628417969,
"logps/real": -238.114501953125,
"loss": 0.07,
"rewards/accuracies": 1.0,
"rewards/generated": -2.6963155269622803,
"rewards/margins": 5.061102867126465,
"rewards/real": 2.3647871017456055,
"step": 2230
},
{
"epoch": 1.43,
"learning_rate": 2.901658767772512e-07,
"logits/generated": 6.281010627746582,
"logits/real": 4.850646495819092,
"logps/generated": -311.21044921875,
"logps/real": -224.34005737304688,
"loss": 0.0678,
"rewards/accuracies": 1.0,
"rewards/generated": -2.614072561264038,
"rewards/margins": 4.778448104858398,
"rewards/real": 2.1643755435943604,
"step": 2240
},
{
"epoch": 1.44,
"learning_rate": 2.889810426540284e-07,
"logits/generated": 6.2719526290893555,
"logits/real": 4.559032440185547,
"logps/generated": -300.2083435058594,
"logps/real": -220.322998046875,
"loss": 0.0626,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.7301433086395264,
"rewards/margins": 5.041817665100098,
"rewards/real": 2.311674118041992,
"step": 2250
},
{
"epoch": 1.45,
"learning_rate": 2.8779620853080567e-07,
"logits/generated": 6.567207336425781,
"logits/real": 4.634784698486328,
"logps/generated": -306.6617736816406,
"logps/real": -223.53744506835938,
"loss": 0.0634,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.5135562419891357,
"rewards/margins": 4.631071090698242,
"rewards/real": 2.1175150871276855,
"step": 2260
},
{
"epoch": 1.45,
"learning_rate": 2.8661137440758294e-07,
"logits/generated": 6.572225093841553,
"logits/real": 5.128222465515137,
"logps/generated": -305.1805419921875,
"logps/real": -244.2073974609375,
"loss": 0.0913,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -2.5788116455078125,
"rewards/margins": 4.599055767059326,
"rewards/real": 2.0202441215515137,
"step": 2270
},
{
"epoch": 1.46,
"learning_rate": 2.854265402843602e-07,
"logits/generated": 6.486606597900391,
"logits/real": 4.381880283355713,
"logps/generated": -299.26226806640625,
"logps/real": -204.21128845214844,
"loss": 0.0607,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.690892219543457,
"rewards/margins": 4.859349727630615,
"rewards/real": 2.168457508087158,
"step": 2280
},
{
"epoch": 1.47,
"learning_rate": 2.842417061611374e-07,
"logits/generated": 6.467171669006348,
"logits/real": 4.881519317626953,
"logps/generated": -316.6208801269531,
"logps/real": -249.66964721679688,
"loss": 0.0561,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.9214892387390137,
"rewards/margins": 4.928795337677002,
"rewards/real": 2.0073063373565674,
"step": 2290
},
{
"epoch": 1.47,
"learning_rate": 2.8305687203791465e-07,
"logits/generated": 6.373992443084717,
"logits/real": 4.881070613861084,
"logps/generated": -304.5431823730469,
"logps/real": -238.75918579101562,
"loss": 0.0637,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.7934460639953613,
"rewards/margins": 5.073416709899902,
"rewards/real": 2.279970645904541,
"step": 2300
},
{
"epoch": 1.48,
"learning_rate": 2.8187203791469193e-07,
"logits/generated": 6.459514617919922,
"logits/real": 4.823994159698486,
"logps/generated": -297.67156982421875,
"logps/real": -244.6254425048828,
"loss": 0.07,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.617419958114624,
"rewards/margins": 4.747994422912598,
"rewards/real": 2.1305739879608154,
"step": 2310
},
{
"epoch": 1.48,
"learning_rate": 2.806872037914692e-07,
"logits/generated": 6.545841217041016,
"logits/real": 4.525175094604492,
"logps/generated": -300.8380126953125,
"logps/real": -230.91641235351562,
"loss": 0.0413,
"rewards/accuracies": 1.0,
"rewards/generated": -2.621128797531128,
"rewards/margins": 4.8851799964904785,
"rewards/real": 2.2640514373779297,
"step": 2320
},
{
"epoch": 1.49,
"learning_rate": 2.795023696682464e-07,
"logits/generated": 6.552148342132568,
"logits/real": 4.585801601409912,
"logps/generated": -291.0857238769531,
"logps/real": -203.3738250732422,
"loss": 0.0633,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -2.501784324645996,
"rewards/margins": 4.606094837188721,
"rewards/real": 2.1043105125427246,
"step": 2330
},
{
"epoch": 1.5,
"learning_rate": 2.783175355450237e-07,
"logits/generated": 6.5855712890625,
"logits/real": 4.8127546310424805,
"logps/generated": -304.4456787109375,
"logps/real": -241.84164428710938,
"loss": 0.0662,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.4264376163482666,
"rewards/margins": 4.380554676055908,
"rewards/real": 1.9541168212890625,
"step": 2340
},
{
"epoch": 1.5,
"learning_rate": 2.7713270142180097e-07,
"logits/generated": 6.61553955078125,
"logits/real": 4.842226982116699,
"logps/generated": -304.24908447265625,
"logps/real": -228.1985321044922,
"loss": 0.0723,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.668332815170288,
"rewards/margins": 4.606504917144775,
"rewards/real": 1.9381721019744873,
"step": 2350
},
{
"epoch": 1.51,
"learning_rate": 2.759478672985782e-07,
"logits/generated": 6.651033878326416,
"logits/real": 5.216360569000244,
"logps/generated": -299.9844665527344,
"logps/real": -245.30838012695312,
"loss": 0.058,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.7325878143310547,
"rewards/margins": 4.621773719787598,
"rewards/real": 1.8891855478286743,
"step": 2360
},
{
"epoch": 1.52,
"learning_rate": 2.747630331753554e-07,
"logits/generated": 6.4999237060546875,
"logits/real": 4.944417476654053,
"logps/generated": -296.3262023925781,
"logps/real": -230.99472045898438,
"loss": 0.0709,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.5117669105529785,
"rewards/margins": 4.533594131469727,
"rewards/real": 2.021827220916748,
"step": 2370
},
{
"epoch": 1.52,
"learning_rate": 2.735781990521327e-07,
"logits/generated": 6.586300849914551,
"logits/real": 4.376750946044922,
"logps/generated": -308.04730224609375,
"logps/real": -199.63783264160156,
"loss": 0.0526,
"rewards/accuracies": 1.0,
"rewards/generated": -2.739675521850586,
"rewards/margins": 5.009511947631836,
"rewards/real": 2.26983642578125,
"step": 2380
},
{
"epoch": 1.53,
"learning_rate": 2.7239336492890995e-07,
"logits/generated": 6.62445592880249,
"logits/real": 4.475614547729492,
"logps/generated": -299.97686767578125,
"logps/real": -205.2351837158203,
"loss": 0.0631,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.6585028171539307,
"rewards/margins": 4.911370277404785,
"rewards/real": 2.2528672218322754,
"step": 2390
},
{
"epoch": 1.54,
"learning_rate": 2.7120853080568717e-07,
"logits/generated": 6.5361738204956055,
"logits/real": 4.921416282653809,
"logps/generated": -320.28619384765625,
"logps/real": -235.4938507080078,
"loss": 0.0686,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.5308923721313477,
"rewards/margins": 4.577965259552002,
"rewards/real": 2.047072649002075,
"step": 2400
},
{
"epoch": 1.54,
"learning_rate": 2.7002369668246444e-07,
"logits/generated": 6.583050727844238,
"logits/real": 4.50087833404541,
"logps/generated": -305.26544189453125,
"logps/real": -205.6936492919922,
"loss": 0.0646,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.650360584259033,
"rewards/margins": 4.845761299133301,
"rewards/real": 2.195401191711426,
"step": 2410
},
{
"epoch": 1.55,
"learning_rate": 2.688388625592417e-07,
"logits/generated": 6.528054714202881,
"logits/real": 4.9675092697143555,
"logps/generated": -287.01385498046875,
"logps/real": -239.06137084960938,
"loss": 0.0695,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.433396577835083,
"rewards/margins": 4.63083553314209,
"rewards/real": 2.1974387168884277,
"step": 2420
},
{
"epoch": 1.55,
"learning_rate": 2.6765402843601894e-07,
"logits/generated": 6.572503089904785,
"logits/real": 4.664327621459961,
"logps/generated": -299.9050598144531,
"logps/real": -226.3813934326172,
"loss": 0.0589,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.615846633911133,
"rewards/margins": 4.903604030609131,
"rewards/real": 2.287757396697998,
"step": 2430
},
{
"epoch": 1.56,
"learning_rate": 2.664691943127962e-07,
"logits/generated": 6.500199794769287,
"logits/real": 4.409898281097412,
"logps/generated": -291.48956298828125,
"logps/real": -221.5597686767578,
"loss": 0.0677,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.5167105197906494,
"rewards/margins": 4.5962815284729,
"rewards/real": 2.079570770263672,
"step": 2440
},
{
"epoch": 1.57,
"learning_rate": 2.6528436018957343e-07,
"logits/generated": 6.451045036315918,
"logits/real": 4.783955097198486,
"logps/generated": -297.8843688964844,
"logps/real": -234.1428985595703,
"loss": 0.0697,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.9517464637756348,
"rewards/margins": 5.131245136260986,
"rewards/real": 2.1794984340667725,
"step": 2450
},
{
"epoch": 1.57,
"learning_rate": 2.640995260663507e-07,
"logits/generated": 6.4251837730407715,
"logits/real": 4.974338054656982,
"logps/generated": -286.2950134277344,
"logps/real": -229.6946258544922,
"loss": 0.0595,
"rewards/accuracies": 1.0,
"rewards/generated": -2.7505643367767334,
"rewards/margins": 4.802645683288574,
"rewards/real": 2.05208158493042,
"step": 2460
},
{
"epoch": 1.58,
"learning_rate": 2.629146919431279e-07,
"logits/generated": 6.499301910400391,
"logits/real": 4.888270378112793,
"logps/generated": -313.7422180175781,
"logps/real": -248.39376831054688,
"loss": 0.0575,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.546126127243042,
"rewards/margins": 4.463229179382324,
"rewards/real": 1.9171028137207031,
"step": 2470
},
{
"epoch": 1.59,
"learning_rate": 2.617298578199052e-07,
"logits/generated": 6.561995506286621,
"logits/real": 4.65811014175415,
"logps/generated": -311.327392578125,
"logps/real": -216.3298797607422,
"loss": 0.057,
"rewards/accuracies": 1.0,
"rewards/generated": -2.7726848125457764,
"rewards/margins": 4.696637153625488,
"rewards/real": 1.923952341079712,
"step": 2480
},
{
"epoch": 1.59,
"learning_rate": 2.6054502369668247e-07,
"logits/generated": 6.564155578613281,
"logits/real": 4.5207109451293945,
"logps/generated": -285.0248107910156,
"logps/real": -203.79513549804688,
"loss": 0.0668,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -2.5576417446136475,
"rewards/margins": 4.535670280456543,
"rewards/real": 1.978028655052185,
"step": 2490
},
{
"epoch": 1.6,
"learning_rate": 2.5936018957345974e-07,
"logits/generated": 6.515015602111816,
"logits/real": 4.583956241607666,
"logps/generated": -298.4903869628906,
"logps/real": -232.15805053710938,
"loss": 0.0499,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.533477544784546,
"rewards/margins": 4.886750221252441,
"rewards/real": 2.353271961212158,
"step": 2500
},
{
"epoch": 1.61,
"learning_rate": 2.5817535545023696e-07,
"logits/generated": 6.423288822174072,
"logits/real": 5.059484958648682,
"logps/generated": -324.56365966796875,
"logps/real": -248.84371948242188,
"loss": 0.0654,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.873297691345215,
"rewards/margins": 5.0330986976623535,
"rewards/real": 2.1598012447357178,
"step": 2510
},
{
"epoch": 1.61,
"learning_rate": 2.5699052132701423e-07,
"logits/generated": 6.517401218414307,
"logits/real": 5.171728134155273,
"logps/generated": -307.1355285644531,
"logps/real": -257.387451171875,
"loss": 0.0434,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.787299633026123,
"rewards/margins": 4.657351016998291,
"rewards/real": 1.8700507879257202,
"step": 2520
},
{
"epoch": 1.62,
"learning_rate": 2.5580568720379145e-07,
"logits/generated": 6.6013922691345215,
"logits/real": 4.844089984893799,
"logps/generated": -297.20379638671875,
"logps/real": -221.24124145507812,
"loss": 0.0468,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.8276898860931396,
"rewards/margins": 4.962311744689941,
"rewards/real": 2.1346216201782227,
"step": 2530
},
{
"epoch": 1.63,
"learning_rate": 2.5462085308056867e-07,
"logits/generated": 6.696247100830078,
"logits/real": 4.772692680358887,
"logps/generated": -315.21832275390625,
"logps/real": -226.9287567138672,
"loss": 0.0455,
"rewards/accuracies": 1.0,
"rewards/generated": -2.7525386810302734,
"rewards/margins": 4.851990222930908,
"rewards/real": 2.0994513034820557,
"step": 2540
},
{
"epoch": 1.63,
"learning_rate": 2.5343601895734595e-07,
"logits/generated": 6.510929107666016,
"logits/real": 4.709242343902588,
"logps/generated": -306.0609436035156,
"logps/real": -211.63021850585938,
"loss": 0.0599,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.842668056488037,
"rewards/margins": 4.8463568687438965,
"rewards/real": 2.0036888122558594,
"step": 2550
},
{
"epoch": 1.64,
"learning_rate": 2.522511848341232e-07,
"logits/generated": 6.442812442779541,
"logits/real": 4.947168827056885,
"logps/generated": -322.0133056640625,
"logps/real": -240.52841186523438,
"loss": 0.0578,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.7832953929901123,
"rewards/margins": 4.885197639465332,
"rewards/real": 2.101902484893799,
"step": 2560
},
{
"epoch": 1.64,
"learning_rate": 2.510663507109005e-07,
"logits/generated": 6.514994144439697,
"logits/real": 4.442912578582764,
"logps/generated": -313.75628662109375,
"logps/real": -222.9632568359375,
"loss": 0.0421,
"rewards/accuracies": 1.0,
"rewards/generated": -2.991903305053711,
"rewards/margins": 5.337101936340332,
"rewards/real": 2.3451991081237793,
"step": 2570
},
{
"epoch": 1.65,
"learning_rate": 2.498815165876777e-07,
"logits/generated": 6.605074405670166,
"logits/real": 4.601679801940918,
"logps/generated": -310.7535705566406,
"logps/real": -230.5736541748047,
"loss": 0.0501,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.7398934364318848,
"rewards/margins": 5.229077339172363,
"rewards/real": 2.4891839027404785,
"step": 2580
},
{
"epoch": 1.66,
"learning_rate": 2.48696682464455e-07,
"logits/generated": 6.653934478759766,
"logits/real": 5.141819953918457,
"logps/generated": -328.99005126953125,
"logps/real": -241.0696563720703,
"loss": 0.0697,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.0767884254455566,
"rewards/margins": 5.069361686706543,
"rewards/real": 1.9925737380981445,
"step": 2590
},
{
"epoch": 1.66,
"learning_rate": 2.475118483412322e-07,
"logits/generated": 6.622979164123535,
"logits/real": 5.017902374267578,
"logps/generated": -309.37762451171875,
"logps/real": -230.4441375732422,
"loss": 0.0612,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.974555253982544,
"rewards/margins": 5.106658935546875,
"rewards/real": 2.13210391998291,
"step": 2600
},
{
"epoch": 1.67,
"learning_rate": 2.463270142180095e-07,
"logits/generated": 6.475897312164307,
"logits/real": 4.987759590148926,
"logps/generated": -323.8994140625,
"logps/real": -236.59725952148438,
"loss": 0.0486,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.926840305328369,
"rewards/margins": 5.029897212982178,
"rewards/real": 2.1030571460723877,
"step": 2610
},
{
"epoch": 1.68,
"learning_rate": 2.451421800947867e-07,
"logits/generated": 6.513401985168457,
"logits/real": 4.884097576141357,
"logps/generated": -295.1630554199219,
"logps/real": -219.2560272216797,
"loss": 0.0509,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.579526662826538,
"rewards/margins": 4.818698883056641,
"rewards/real": 2.2391719818115234,
"step": 2620
},
{
"epoch": 1.68,
"learning_rate": 2.4395734597156397e-07,
"logits/generated": 6.569561958312988,
"logits/real": 4.7536301612854,
"logps/generated": -314.96917724609375,
"logps/real": -241.3905487060547,
"loss": 0.0581,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.9935827255249023,
"rewards/margins": 5.2356743812561035,
"rewards/real": 2.242091655731201,
"step": 2630
},
{
"epoch": 1.69,
"learning_rate": 2.4277251184834124e-07,
"logits/generated": 6.643240451812744,
"logits/real": 4.672991752624512,
"logps/generated": -298.296630859375,
"logps/real": -218.02627563476562,
"loss": 0.0567,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.751802921295166,
"rewards/margins": 4.864652156829834,
"rewards/real": 2.112849473953247,
"step": 2640
},
{
"epoch": 1.7,
"learning_rate": 2.4158767772511846e-07,
"logits/generated": 6.543112277984619,
"logits/real": 4.794711112976074,
"logps/generated": -300.0271911621094,
"logps/real": -229.9580535888672,
"loss": 0.0449,
"rewards/accuracies": 1.0,
"rewards/generated": -2.9769158363342285,
"rewards/margins": 5.159039497375488,
"rewards/real": 2.182124137878418,
"step": 2650
},
{
"epoch": 1.7,
"learning_rate": 2.4040284360189573e-07,
"logits/generated": 6.426865577697754,
"logits/real": 5.070995807647705,
"logps/generated": -312.7875061035156,
"logps/real": -244.02859497070312,
"loss": 0.0482,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.957658529281616,
"rewards/margins": 4.830506801605225,
"rewards/real": 1.8728487491607666,
"step": 2660
},
{
"epoch": 1.71,
"learning_rate": 2.39218009478673e-07,
"logits/generated": 6.447142601013184,
"logits/real": 4.88741397857666,
"logps/generated": -316.395751953125,
"logps/real": -238.98989868164062,
"loss": 0.0624,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.3095619678497314,
"rewards/margins": 5.356574535369873,
"rewards/real": 2.0470128059387207,
"step": 2670
},
{
"epoch": 1.71,
"learning_rate": 2.3803317535545023e-07,
"logits/generated": 6.639400482177734,
"logits/real": 4.574619293212891,
"logps/generated": -311.22705078125,
"logps/real": -213.94375610351562,
"loss": 0.0609,
"rewards/accuracies": 1.0,
"rewards/generated": -2.9241037368774414,
"rewards/margins": 5.196986675262451,
"rewards/real": 2.2728826999664307,
"step": 2680
},
{
"epoch": 1.72,
"learning_rate": 2.3684834123222747e-07,
"logits/generated": 6.604647159576416,
"logits/real": 4.500279426574707,
"logps/generated": -297.166015625,
"logps/real": -226.86227416992188,
"loss": 0.0609,
"rewards/accuracies": 1.0,
"rewards/generated": -3.1100852489471436,
"rewards/margins": 5.564393520355225,
"rewards/real": 2.45430850982666,
"step": 2690
},
{
"epoch": 1.73,
"learning_rate": 2.3566350710900475e-07,
"logits/generated": 6.377281665802002,
"logits/real": 4.427682399749756,
"logps/generated": -303.52667236328125,
"logps/real": -228.33834838867188,
"loss": 0.0493,
"rewards/accuracies": 1.0,
"rewards/generated": -2.9995968341827393,
"rewards/margins": 5.4720869064331055,
"rewards/real": 2.4724905490875244,
"step": 2700
},
{
"epoch": 1.73,
"learning_rate": 2.3447867298578197e-07,
"logits/generated": 6.596705436706543,
"logits/real": 4.664139747619629,
"logps/generated": -299.83526611328125,
"logps/real": -237.1983184814453,
"loss": 0.0687,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.115382432937622,
"rewards/margins": 5.5024261474609375,
"rewards/real": 2.3870432376861572,
"step": 2710
},
{
"epoch": 1.74,
"learning_rate": 2.3329383886255924e-07,
"logits/generated": 6.471650123596191,
"logits/real": 4.98661470413208,
"logps/generated": -323.77984619140625,
"logps/real": -239.13720703125,
"loss": 0.04,
"rewards/accuracies": 1.0,
"rewards/generated": -3.5205624103546143,
"rewards/margins": 6.0251383781433105,
"rewards/real": 2.5045764446258545,
"step": 2720
},
{
"epoch": 1.75,
"learning_rate": 2.3210900473933649e-07,
"logits/generated": 6.571569919586182,
"logits/real": 5.169942855834961,
"logps/generated": -310.2748107910156,
"logps/real": -243.19113159179688,
"loss": 0.0424,
"rewards/accuracies": 1.0,
"rewards/generated": -3.2248611450195312,
"rewards/margins": 5.1551642417907715,
"rewards/real": 1.9303032159805298,
"step": 2730
},
{
"epoch": 1.75,
"learning_rate": 2.3092417061611373e-07,
"logits/generated": 6.645476341247559,
"logits/real": 4.323509693145752,
"logps/generated": -305.4945373535156,
"logps/real": -211.2071990966797,
"loss": 0.0508,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.021256923675537,
"rewards/margins": 5.193289756774902,
"rewards/real": 2.1720330715179443,
"step": 2740
},
{
"epoch": 1.76,
"learning_rate": 2.2973933649289098e-07,
"logits/generated": 6.557646751403809,
"logits/real": 4.875040531158447,
"logps/generated": -320.00830078125,
"logps/real": -238.9925994873047,
"loss": 0.042,
"rewards/accuracies": 1.0,
"rewards/generated": -3.284480333328247,
"rewards/margins": 5.7841997146606445,
"rewards/real": 2.4997196197509766,
"step": 2750
},
{
"epoch": 1.77,
"learning_rate": 2.2855450236966822e-07,
"logits/generated": 6.572206974029541,
"logits/real": 4.644891262054443,
"logps/generated": -314.3716735839844,
"logps/real": -220.7062225341797,
"loss": 0.0737,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.0081522464752197,
"rewards/margins": 4.949704170227051,
"rewards/real": 1.9415524005889893,
"step": 2760
},
{
"epoch": 1.77,
"learning_rate": 2.273696682464455e-07,
"logits/generated": 6.575781345367432,
"logits/real": 4.972030162811279,
"logps/generated": -304.5403137207031,
"logps/real": -226.6322021484375,
"loss": 0.0561,
"rewards/accuracies": 1.0,
"rewards/generated": -3.0886337757110596,
"rewards/margins": 5.250014781951904,
"rewards/real": 2.1613805294036865,
"step": 2770
},
{
"epoch": 1.78,
"learning_rate": 2.2618483412322272e-07,
"logits/generated": 6.253432273864746,
"logits/real": 4.807991981506348,
"logps/generated": -308.74737548828125,
"logps/real": -265.22088623046875,
"loss": 0.058,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -3.068493366241455,
"rewards/margins": 5.278921127319336,
"rewards/real": 2.2104272842407227,
"step": 2780
},
{
"epoch": 1.79,
"learning_rate": 2.25e-07,
"logits/generated": 6.551450252532959,
"logits/real": 5.10826301574707,
"logps/generated": -307.3768615722656,
"logps/real": -237.50149536132812,
"loss": 0.0522,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.9784510135650635,
"rewards/margins": 4.849761962890625,
"rewards/real": 1.8713109493255615,
"step": 2790
},
{
"epoch": 1.79,
"learning_rate": 2.2381516587677724e-07,
"logits/generated": 6.531739234924316,
"logits/real": 4.827146530151367,
"logps/generated": -314.5278015136719,
"logps/real": -241.0078887939453,
"loss": 0.0595,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.0112459659576416,
"rewards/margins": 4.8616437911987305,
"rewards/real": 1.8503978252410889,
"step": 2800
},
{
"epoch": 1.8,
"learning_rate": 2.226303317535545e-07,
"logits/generated": 6.410187721252441,
"logits/real": 5.148890495300293,
"logps/generated": -312.85565185546875,
"logps/real": -238.3211212158203,
"loss": 0.0568,
"rewards/accuracies": 1.0,
"rewards/generated": -3.0788676738739014,
"rewards/margins": 5.32081413269043,
"rewards/real": 2.2419466972351074,
"step": 2810
},
{
"epoch": 1.8,
"learning_rate": 2.2144549763033173e-07,
"logits/generated": 6.559216499328613,
"logits/real": 4.562496662139893,
"logps/generated": -326.8006591796875,
"logps/real": -229.6642608642578,
"loss": 0.046,
"rewards/accuracies": 1.0,
"rewards/generated": -3.402270793914795,
"rewards/margins": 5.84303092956543,
"rewards/real": 2.4407601356506348,
"step": 2820
},
{
"epoch": 1.81,
"learning_rate": 2.20260663507109e-07,
"logits/generated": 6.503948211669922,
"logits/real": 4.351350784301758,
"logps/generated": -308.74688720703125,
"logps/real": -216.2238006591797,
"loss": 0.0504,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.0520529747009277,
"rewards/margins": 5.5729241371154785,
"rewards/real": 2.5208706855773926,
"step": 2830
},
{
"epoch": 1.82,
"learning_rate": 2.1907582938388625e-07,
"logits/generated": 6.486424922943115,
"logits/real": 4.887758731842041,
"logps/generated": -324.04327392578125,
"logps/real": -245.8160400390625,
"loss": 0.0484,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.8712515830993652,
"rewards/margins": 5.168000221252441,
"rewards/real": 2.296747922897339,
"step": 2840
},
{
"epoch": 1.82,
"learning_rate": 2.178909952606635e-07,
"logits/generated": 6.496403694152832,
"logits/real": 4.948525428771973,
"logps/generated": -313.8655090332031,
"logps/real": -232.6029815673828,
"loss": 0.0384,
"rewards/accuracies": 1.0,
"rewards/generated": -3.164641857147217,
"rewards/margins": 5.175568103790283,
"rewards/real": 2.0109262466430664,
"step": 2850
},
{
"epoch": 1.83,
"learning_rate": 2.1670616113744074e-07,
"logits/generated": 6.357646465301514,
"logits/real": 4.914302825927734,
"logps/generated": -304.9514465332031,
"logps/real": -231.0625,
"loss": 0.0739,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.8249194622039795,
"rewards/margins": 4.825296878814697,
"rewards/real": 2.0003771781921387,
"step": 2860
},
{
"epoch": 1.84,
"learning_rate": 2.15521327014218e-07,
"logits/generated": 6.526192665100098,
"logits/real": 4.569971084594727,
"logps/generated": -310.04571533203125,
"logps/real": -212.1841278076172,
"loss": 0.0442,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.9459242820739746,
"rewards/margins": 5.625715732574463,
"rewards/real": 2.6797919273376465,
"step": 2870
},
{
"epoch": 1.84,
"learning_rate": 2.1433649289099526e-07,
"logits/generated": 6.470966339111328,
"logits/real": 4.952636241912842,
"logps/generated": -298.78216552734375,
"logps/real": -241.9241180419922,
"loss": 0.0488,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -2.7446885108947754,
"rewards/margins": 4.924825668334961,
"rewards/real": 2.1801369190216064,
"step": 2880
},
{
"epoch": 1.85,
"learning_rate": 2.131516587677725e-07,
"logits/generated": 6.543872833251953,
"logits/real": 5.083493232727051,
"logps/generated": -328.01873779296875,
"logps/real": -251.84909057617188,
"loss": 0.0382,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.1929991245269775,
"rewards/margins": 5.793171405792236,
"rewards/real": 2.6001715660095215,
"step": 2890
},
{
"epoch": 1.86,
"learning_rate": 2.1196682464454975e-07,
"logits/generated": 6.523638725280762,
"logits/real": 4.766221046447754,
"logps/generated": -294.3127136230469,
"logps/real": -227.9248809814453,
"loss": 0.0495,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.653705596923828,
"rewards/margins": 4.580477714538574,
"rewards/real": 1.9267723560333252,
"step": 2900
},
{
"epoch": 1.86,
"learning_rate": 2.10781990521327e-07,
"logits/generated": 6.478121757507324,
"logits/real": 5.104989528656006,
"logps/generated": -296.796875,
"logps/real": -241.1748809814453,
"loss": 0.0434,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.8308489322662354,
"rewards/margins": 4.990128517150879,
"rewards/real": 2.1592795848846436,
"step": 2910
},
{
"epoch": 1.87,
"learning_rate": 2.0959715639810427e-07,
"logits/generated": 6.648934841156006,
"logits/real": 4.384207725524902,
"logps/generated": -314.3799133300781,
"logps/real": -215.88339233398438,
"loss": 0.0426,
"rewards/accuracies": 1.0,
"rewards/generated": -3.3432087898254395,
"rewards/margins": 5.64975118637085,
"rewards/real": 2.306542158126831,
"step": 2920
},
{
"epoch": 1.87,
"learning_rate": 2.0841232227488152e-07,
"logits/generated": 6.548549652099609,
"logits/real": 4.6239542961120605,
"logps/generated": -317.40057373046875,
"logps/real": -234.81787109375,
"loss": 0.0453,
"rewards/accuracies": 1.0,
"rewards/generated": -3.6547443866729736,
"rewards/margins": 5.791055202484131,
"rewards/real": 2.1363110542297363,
"step": 2930
},
{
"epoch": 1.88,
"learning_rate": 2.0722748815165874e-07,
"logits/generated": 6.573233604431152,
"logits/real": 4.537630558013916,
"logps/generated": -312.5951232910156,
"logps/real": -227.45846557617188,
"loss": 0.0471,
"rewards/accuracies": 1.0,
"rewards/generated": -3.44865083694458,
"rewards/margins": 5.903794288635254,
"rewards/real": 2.455143690109253,
"step": 2940
},
{
"epoch": 1.89,
"learning_rate": 2.06042654028436e-07,
"logits/generated": 6.5797014236450195,
"logits/real": 4.640379428863525,
"logps/generated": -309.2285461425781,
"logps/real": -226.133056640625,
"loss": 0.0369,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.0750410556793213,
"rewards/margins": 5.1463847160339355,
"rewards/real": 2.0713436603546143,
"step": 2950
},
{
"epoch": 1.89,
"learning_rate": 2.0485781990521326e-07,
"logits/generated": 6.535305023193359,
"logits/real": 4.605700492858887,
"logps/generated": -319.6959228515625,
"logps/real": -225.58975219726562,
"loss": 0.0426,
"rewards/accuracies": 1.0,
"rewards/generated": -3.13779878616333,
"rewards/margins": 5.010854721069336,
"rewards/real": 1.8730554580688477,
"step": 2960
},
{
"epoch": 1.9,
"learning_rate": 2.0367298578199053e-07,
"logits/generated": 6.437843322753906,
"logits/real": 4.84013032913208,
"logps/generated": -301.1745300292969,
"logps/real": -227.8412322998047,
"loss": 0.0418,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.978807210922241,
"rewards/margins": 4.996744632720947,
"rewards/real": 2.017937421798706,
"step": 2970
},
{
"epoch": 1.91,
"learning_rate": 2.0248815165876775e-07,
"logits/generated": 6.470362186431885,
"logits/real": 5.108733654022217,
"logps/generated": -319.79840087890625,
"logps/real": -237.103515625,
"loss": 0.0673,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.0991902351379395,
"rewards/margins": 5.182190418243408,
"rewards/real": 2.0830001831054688,
"step": 2980
},
{
"epoch": 1.91,
"learning_rate": 2.0130331753554502e-07,
"logits/generated": 6.5366621017456055,
"logits/real": 5.0998101234436035,
"logps/generated": -300.20758056640625,
"logps/real": -247.12393188476562,
"loss": 0.0637,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.1575188636779785,
"rewards/margins": 5.251214981079102,
"rewards/real": 2.093696117401123,
"step": 2990
},
{
"epoch": 1.92,
"learning_rate": 2.0011848341232227e-07,
"logits/generated": 6.446131706237793,
"logits/real": 4.840705871582031,
"logps/generated": -315.06439208984375,
"logps/real": -226.3990478515625,
"loss": 0.0472,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.494022846221924,
"rewards/margins": 5.752326965332031,
"rewards/real": 2.2583041191101074,
"step": 3000
},
{
"epoch": 1.93,
"learning_rate": 1.9893364928909952e-07,
"logits/generated": 6.619847297668457,
"logits/real": 4.667858600616455,
"logps/generated": -294.6004333496094,
"logps/real": -197.28118896484375,
"loss": 0.0512,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.1600637435913086,
"rewards/margins": 5.338671684265137,
"rewards/real": 2.1786084175109863,
"step": 3010
},
{
"epoch": 1.93,
"learning_rate": 1.9774881516587676e-07,
"logits/generated": 6.53371524810791,
"logits/real": 4.700278282165527,
"logps/generated": -309.0678405761719,
"logps/real": -220.1807861328125,
"loss": 0.0639,
"rewards/accuracies": 1.0,
"rewards/generated": -3.0327439308166504,
"rewards/margins": 5.228504180908203,
"rewards/real": 2.1957602500915527,
"step": 3020
},
{
"epoch": 1.94,
"learning_rate": 1.96563981042654e-07,
"logits/generated": 6.723300933837891,
"logits/real": 4.539021968841553,
"logps/generated": -309.5898132324219,
"logps/real": -205.1199493408203,
"loss": 0.0483,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.1201491355895996,
"rewards/margins": 5.280410289764404,
"rewards/real": 2.1602609157562256,
"step": 3030
},
{
"epoch": 1.94,
"learning_rate": 1.9537914691943128e-07,
"logits/generated": 6.615107536315918,
"logits/real": 4.7945075035095215,
"logps/generated": -340.0155944824219,
"logps/real": -234.44259643554688,
"loss": 0.0439,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.4857048988342285,
"rewards/margins": 5.184080600738525,
"rewards/real": 1.6983757019042969,
"step": 3040
},
{
"epoch": 1.95,
"learning_rate": 1.9419431279620853e-07,
"logits/generated": 6.457086086273193,
"logits/real": 4.920231819152832,
"logps/generated": -301.90081787109375,
"logps/real": -209.1177978515625,
"loss": 0.0413,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -2.7858481407165527,
"rewards/margins": 4.725141525268555,
"rewards/real": 1.9392932653427124,
"step": 3050
},
{
"epoch": 1.96,
"learning_rate": 1.9300947867298577e-07,
"logits/generated": 6.525505065917969,
"logits/real": 4.8382768630981445,
"logps/generated": -294.0171813964844,
"logps/real": -215.0476531982422,
"loss": 0.0385,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.3542685508728027,
"rewards/margins": 5.338757514953613,
"rewards/real": 1.9844884872436523,
"step": 3060
},
{
"epoch": 1.96,
"learning_rate": 1.9182464454976302e-07,
"logits/generated": 6.592054843902588,
"logits/real": 4.595870494842529,
"logps/generated": -303.69305419921875,
"logps/real": -216.79074096679688,
"loss": 0.044,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -3.3395583629608154,
"rewards/margins": 5.180264949798584,
"rewards/real": 1.8407065868377686,
"step": 3070
},
{
"epoch": 1.97,
"learning_rate": 1.906398104265403e-07,
"logits/generated": 6.421343803405762,
"logits/real": 4.30601167678833,
"logps/generated": -316.9730529785156,
"logps/real": -221.79721069335938,
"loss": 0.046,
"rewards/accuracies": 1.0,
"rewards/generated": -3.5621120929718018,
"rewards/margins": 5.808313846588135,
"rewards/real": 2.246202230453491,
"step": 3080
},
{
"epoch": 1.98,
"learning_rate": 1.8945497630331754e-07,
"logits/generated": 6.610709190368652,
"logits/real": 4.592480182647705,
"logps/generated": -305.1477355957031,
"logps/real": -200.81964111328125,
"loss": 0.0469,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.413442611694336,
"rewards/margins": 5.351014614105225,
"rewards/real": 1.9375722408294678,
"step": 3090
},
{
"epoch": 1.98,
"learning_rate": 1.8827014218009476e-07,
"logits/generated": 6.592061519622803,
"logits/real": 4.954311370849609,
"logps/generated": -323.15411376953125,
"logps/real": -261.54278564453125,
"loss": 0.0489,
"rewards/accuracies": 1.0,
"rewards/generated": -3.3372626304626465,
"rewards/margins": 5.153835773468018,
"rewards/real": 1.816572904586792,
"step": 3100
},
{
"epoch": 1.99,
"learning_rate": 1.8708530805687203e-07,
"logits/generated": 6.648950099945068,
"logits/real": 4.868198871612549,
"logps/generated": -311.92987060546875,
"logps/real": -221.17819213867188,
"loss": 0.05,
"rewards/accuracies": 1.0,
"rewards/generated": -3.4177818298339844,
"rewards/margins": 5.747452259063721,
"rewards/real": 2.3296706676483154,
"step": 3110
},
{
"epoch": 2.0,
"learning_rate": 1.8590047393364928e-07,
"logits/generated": 6.51468563079834,
"logits/real": 4.445683479309082,
"logps/generated": -308.3443908691406,
"logps/real": -217.1921844482422,
"loss": 0.0456,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.044935941696167,
"rewards/margins": 5.253106594085693,
"rewards/real": 2.2081711292266846,
"step": 3120
},
{
"epoch": 2.0,
"learning_rate": 1.8471563981042655e-07,
"logits/generated": 6.549835205078125,
"logits/real": 4.801936149597168,
"logps/generated": -295.6900939941406,
"logps/real": -213.8479766845703,
"loss": 0.0543,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.1135101318359375,
"rewards/margins": 5.0333404541015625,
"rewards/real": 1.9198299646377563,
"step": 3130
},
{
"epoch": 2.01,
"learning_rate": 1.8353080568720377e-07,
"logits/generated": 6.434506416320801,
"logits/real": 4.751941204071045,
"logps/generated": -318.86431884765625,
"logps/real": -244.60107421875,
"loss": 0.0339,
"rewards/accuracies": 1.0,
"rewards/generated": -3.4822006225585938,
"rewards/margins": 5.774388313293457,
"rewards/real": 2.2921876907348633,
"step": 3140
},
{
"epoch": 2.02,
"learning_rate": 1.8234597156398104e-07,
"logits/generated": 6.266590118408203,
"logits/real": 4.948928356170654,
"logps/generated": -310.56341552734375,
"logps/real": -226.72293090820312,
"loss": 0.0339,
"rewards/accuracies": 1.0,
"rewards/generated": -3.347864866256714,
"rewards/margins": 5.35115909576416,
"rewards/real": 2.003293991088867,
"step": 3150
},
{
"epoch": 2.02,
"learning_rate": 1.811611374407583e-07,
"logits/generated": 6.356667518615723,
"logits/real": 4.884829521179199,
"logps/generated": -313.6650390625,
"logps/real": -234.5040283203125,
"loss": 0.0393,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.3364055156707764,
"rewards/margins": 5.497099876403809,
"rewards/real": 2.160693645477295,
"step": 3160
},
{
"epoch": 2.03,
"learning_rate": 1.7997630331753554e-07,
"logits/generated": 6.548737525939941,
"logits/real": 4.825071334838867,
"logps/generated": -291.5633850097656,
"logps/real": -233.65988159179688,
"loss": 0.0505,
"rewards/accuracies": 1.0,
"rewards/generated": -3.0673575401306152,
"rewards/margins": 5.524203300476074,
"rewards/real": 2.456845760345459,
"step": 3170
},
{
"epoch": 2.03,
"learning_rate": 1.7879146919431278e-07,
"logits/generated": 6.497158050537109,
"logits/real": 4.860651969909668,
"logps/generated": -316.8197937011719,
"logps/real": -223.147705078125,
"loss": 0.0325,
"rewards/accuracies": 1.0,
"rewards/generated": -3.715630292892456,
"rewards/margins": 5.779938697814941,
"rewards/real": 2.064307689666748,
"step": 3180
},
{
"epoch": 2.04,
"learning_rate": 1.7760663507109003e-07,
"logits/generated": 6.6343674659729,
"logits/real": 4.742542266845703,
"logps/generated": -314.57635498046875,
"logps/real": -218.7213897705078,
"loss": 0.0589,
"rewards/accuracies": 1.0,
"rewards/generated": -3.649468183517456,
"rewards/margins": 5.897641181945801,
"rewards/real": 2.248173713684082,
"step": 3190
},
{
"epoch": 2.05,
"learning_rate": 1.764218009478673e-07,
"logits/generated": 6.572301387786865,
"logits/real": 4.605647087097168,
"logps/generated": -312.57159423828125,
"logps/real": -218.27255249023438,
"loss": 0.0499,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.4011642932891846,
"rewards/margins": 5.888424396514893,
"rewards/real": 2.487260341644287,
"step": 3200
},
{
"epoch": 2.05,
"learning_rate": 1.7523696682464452e-07,
"logits/generated": 6.526484489440918,
"logits/real": 4.609063625335693,
"logps/generated": -319.1774597167969,
"logps/real": -225.0438232421875,
"loss": 0.0432,
"rewards/accuracies": 1.0,
"rewards/generated": -3.5665206909179688,
"rewards/margins": 5.571340084075928,
"rewards/real": 2.004819393157959,
"step": 3210
},
{
"epoch": 2.06,
"learning_rate": 1.740521327014218e-07,
"logits/generated": 6.531280517578125,
"logits/real": 4.885863780975342,
"logps/generated": -315.1875915527344,
"logps/real": -237.132568359375,
"loss": 0.0469,
"rewards/accuracies": 1.0,
"rewards/generated": -3.1818227767944336,
"rewards/margins": 5.084862232208252,
"rewards/real": 1.9030392169952393,
"step": 3220
},
{
"epoch": 2.07,
"learning_rate": 1.7286729857819904e-07,
"logits/generated": 6.615921974182129,
"logits/real": 4.542719841003418,
"logps/generated": -314.04119873046875,
"logps/real": -225.2353515625,
"loss": 0.0352,
"rewards/accuracies": 1.0,
"rewards/generated": -3.191343069076538,
"rewards/margins": 5.438819408416748,
"rewards/real": 2.247476816177368,
"step": 3230
},
{
"epoch": 2.07,
"learning_rate": 1.7168246445497631e-07,
"logits/generated": 6.290173530578613,
"logits/real": 4.586104869842529,
"logps/generated": -313.9992370605469,
"logps/real": -213.3672332763672,
"loss": 0.0379,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.7760837078094482,
"rewards/margins": 6.122511863708496,
"rewards/real": 2.3464274406433105,
"step": 3240
},
{
"epoch": 2.08,
"learning_rate": 1.7049763033175353e-07,
"logits/generated": 6.58236837387085,
"logits/real": 4.376986503601074,
"logps/generated": -296.7496643066406,
"logps/real": -215.1361083984375,
"loss": 0.0364,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -3.280973434448242,
"rewards/margins": 5.545414924621582,
"rewards/real": 2.26444149017334,
"step": 3250
},
{
"epoch": 2.09,
"learning_rate": 1.693127962085308e-07,
"logits/generated": 6.613530158996582,
"logits/real": 4.965182304382324,
"logps/generated": -320.67425537109375,
"logps/real": -241.35256958007812,
"loss": 0.0375,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.3830809593200684,
"rewards/margins": 5.788832664489746,
"rewards/real": 2.4057505130767822,
"step": 3260
},
{
"epoch": 2.09,
"learning_rate": 1.6812796208530805e-07,
"logits/generated": 6.390702724456787,
"logits/real": 4.728426933288574,
"logps/generated": -312.08929443359375,
"logps/real": -227.0480499267578,
"loss": 0.038,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.2798690795898438,
"rewards/margins": 5.261677265167236,
"rewards/real": 1.9818084239959717,
"step": 3270
},
{
"epoch": 2.1,
"learning_rate": 1.669431279620853e-07,
"logits/generated": 6.487574100494385,
"logits/real": 4.512204647064209,
"logps/generated": -321.84368896484375,
"logps/real": -240.1066131591797,
"loss": 0.0496,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.5348918437957764,
"rewards/margins": 5.897745132446289,
"rewards/real": 2.3628532886505127,
"step": 3280
},
{
"epoch": 2.1,
"learning_rate": 1.6575829383886255e-07,
"logits/generated": 6.475184440612793,
"logits/real": 4.735175132751465,
"logps/generated": -296.4978942871094,
"logps/real": -225.4010467529297,
"loss": 0.053,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.1431491374969482,
"rewards/margins": 5.334324359893799,
"rewards/real": 2.1911754608154297,
"step": 3290
},
{
"epoch": 2.11,
"learning_rate": 1.645734597156398e-07,
"logits/generated": 6.46063756942749,
"logits/real": 4.446132183074951,
"logps/generated": -306.07568359375,
"logps/real": -224.0647735595703,
"loss": 0.0525,
"rewards/accuracies": 1.0,
"rewards/generated": -3.518324613571167,
"rewards/margins": 5.946985721588135,
"rewards/real": 2.4286608695983887,
"step": 3300
},
{
"epoch": 2.12,
"learning_rate": 1.6338862559241706e-07,
"logits/generated": 6.575872898101807,
"logits/real": 4.777173042297363,
"logps/generated": -312.64984130859375,
"logps/real": -225.09823608398438,
"loss": 0.0327,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.520355701446533,
"rewards/margins": 5.975916862487793,
"rewards/real": 2.4555611610412598,
"step": 3310
},
{
"epoch": 2.12,
"learning_rate": 1.622037914691943e-07,
"logits/generated": 6.580047607421875,
"logits/real": 4.5617146492004395,
"logps/generated": -303.8891296386719,
"logps/real": -218.8231658935547,
"loss": 0.0451,
"rewards/accuracies": 1.0,
"rewards/generated": -3.342977523803711,
"rewards/margins": 5.351634502410889,
"rewards/real": 2.0086567401885986,
"step": 3320
},
{
"epoch": 2.13,
"learning_rate": 1.6101895734597156e-07,
"logits/generated": 6.562408447265625,
"logits/real": 5.205977916717529,
"logps/generated": -321.8945617675781,
"logps/real": -269.1778564453125,
"loss": 0.0441,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.6907927989959717,
"rewards/margins": 5.766333103179932,
"rewards/real": 2.07554030418396,
"step": 3330
},
{
"epoch": 2.14,
"learning_rate": 1.598341232227488e-07,
"logits/generated": 6.638333320617676,
"logits/real": 4.657790184020996,
"logps/generated": -319.16754150390625,
"logps/real": -232.8119354248047,
"loss": 0.0498,
"rewards/accuracies": 1.0,
"rewards/generated": -3.34710693359375,
"rewards/margins": 5.686694145202637,
"rewards/real": 2.3395867347717285,
"step": 3340
},
{
"epoch": 2.14,
"learning_rate": 1.5864928909952605e-07,
"logits/generated": 6.637696743011475,
"logits/real": 4.663809776306152,
"logps/generated": -317.9059143066406,
"logps/real": -223.9643096923828,
"loss": 0.0544,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.5467820167541504,
"rewards/margins": 5.613127708435059,
"rewards/real": 2.066345691680908,
"step": 3350
},
{
"epoch": 2.15,
"learning_rate": 1.5746445497630332e-07,
"logits/generated": 6.5797553062438965,
"logits/real": 5.032973289489746,
"logps/generated": -309.12310791015625,
"logps/real": -231.84146118164062,
"loss": 0.0434,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.374034881591797,
"rewards/margins": 5.373286247253418,
"rewards/real": 1.9992516040802002,
"step": 3360
},
{
"epoch": 2.16,
"learning_rate": 1.5627962085308054e-07,
"logits/generated": 6.6527228355407715,
"logits/real": 4.813700199127197,
"logps/generated": -314.54132080078125,
"logps/real": -244.5780487060547,
"loss": 0.0367,
"rewards/accuracies": 1.0,
"rewards/generated": -3.4785728454589844,
"rewards/margins": 5.757080078125,
"rewards/real": 2.2785069942474365,
"step": 3370
},
{
"epoch": 2.16,
"learning_rate": 1.5509478672985782e-07,
"logits/generated": 6.394598007202148,
"logits/real": 4.875939846038818,
"logps/generated": -316.898681640625,
"logps/real": -229.66049194335938,
"loss": 0.0356,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.3502418994903564,
"rewards/margins": 5.546257972717285,
"rewards/real": 2.196016311645508,
"step": 3380
},
{
"epoch": 2.17,
"learning_rate": 1.5390995260663506e-07,
"logits/generated": 6.59166955947876,
"logits/real": 4.965734004974365,
"logps/generated": -299.8507385253906,
"logps/real": -240.0013885498047,
"loss": 0.0397,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.1730546951293945,
"rewards/margins": 5.254295349121094,
"rewards/real": 2.081240177154541,
"step": 3390
},
{
"epoch": 2.18,
"learning_rate": 1.5272511848341233e-07,
"logits/generated": 6.579958915710449,
"logits/real": 4.6075544357299805,
"logps/generated": -311.3309631347656,
"logps/real": -231.1168975830078,
"loss": 0.0377,
"rewards/accuracies": 1.0,
"rewards/generated": -3.3581135272979736,
"rewards/margins": 5.885567665100098,
"rewards/real": 2.5274531841278076,
"step": 3400
},
{
"epoch": 2.18,
"learning_rate": 1.5154028436018955e-07,
"logits/generated": 6.564838409423828,
"logits/real": 4.323554515838623,
"logps/generated": -314.01080322265625,
"logps/real": -216.9060821533203,
"loss": 0.054,
"rewards/accuracies": 1.0,
"rewards/generated": -3.6513094902038574,
"rewards/margins": 6.1155195236206055,
"rewards/real": 2.464210033416748,
"step": 3410
},
{
"epoch": 2.19,
"learning_rate": 1.5035545023696683e-07,
"logits/generated": 6.470817565917969,
"logits/real": 4.969902038574219,
"logps/generated": -315.4042663574219,
"logps/real": -210.75894165039062,
"loss": 0.0383,
"rewards/accuracies": 1.0,
"rewards/generated": -3.6396255493164062,
"rewards/margins": 5.780555248260498,
"rewards/real": 2.140929698944092,
"step": 3420
},
{
"epoch": 2.19,
"learning_rate": 1.4917061611374407e-07,
"logits/generated": 6.28502082824707,
"logits/real": 4.579121112823486,
"logps/generated": -298.55078125,
"logps/real": -227.2353057861328,
"loss": 0.0396,
"rewards/accuracies": 1.0,
"rewards/generated": -3.269718647003174,
"rewards/margins": 5.49901819229126,
"rewards/real": 2.2292990684509277,
"step": 3430
},
{
"epoch": 2.2,
"learning_rate": 1.4798578199052132e-07,
"logits/generated": 6.529310703277588,
"logits/real": 5.1802496910095215,
"logps/generated": -314.0540466308594,
"logps/real": -246.96493530273438,
"loss": 0.0459,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.632396697998047,
"rewards/margins": 5.8060173988342285,
"rewards/real": 2.173621892929077,
"step": 3440
},
{
"epoch": 2.21,
"learning_rate": 1.4680094786729857e-07,
"logits/generated": 6.60665225982666,
"logits/real": 4.755316257476807,
"logps/generated": -315.1619873046875,
"logps/real": -212.05435180664062,
"loss": 0.0569,
"rewards/accuracies": 1.0,
"rewards/generated": -3.5212502479553223,
"rewards/margins": 5.648011207580566,
"rewards/real": 2.126760959625244,
"step": 3450
},
{
"epoch": 2.21,
"learning_rate": 1.456161137440758e-07,
"logits/generated": 6.501960754394531,
"logits/real": 4.503037929534912,
"logps/generated": -330.319580078125,
"logps/real": -211.60977172851562,
"loss": 0.0477,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.714958906173706,
"rewards/margins": 5.880065441131592,
"rewards/real": 2.1651062965393066,
"step": 3460
},
{
"epoch": 2.22,
"learning_rate": 1.4443127962085309e-07,
"logits/generated": 6.524314880371094,
"logits/real": 4.682694911956787,
"logps/generated": -318.777099609375,
"logps/real": -249.3422088623047,
"loss": 0.0234,
"rewards/accuracies": 1.0,
"rewards/generated": -3.846958875656128,
"rewards/margins": 5.798233985900879,
"rewards/real": 1.9512755870819092,
"step": 3470
},
{
"epoch": 2.23,
"learning_rate": 1.4324644549763033e-07,
"logits/generated": 6.539910793304443,
"logits/real": 4.809814929962158,
"logps/generated": -323.9732360839844,
"logps/real": -247.67227172851562,
"loss": 0.0404,
"rewards/accuracies": 1.0,
"rewards/generated": -3.5829575061798096,
"rewards/margins": 5.772634506225586,
"rewards/real": 2.1896770000457764,
"step": 3480
},
{
"epoch": 2.23,
"learning_rate": 1.4206161137440758e-07,
"logits/generated": 6.5540771484375,
"logits/real": 4.512951850891113,
"logps/generated": -315.17010498046875,
"logps/real": -210.12588500976562,
"loss": 0.0278,
"rewards/accuracies": 1.0,
"rewards/generated": -3.6037814617156982,
"rewards/margins": 5.973625183105469,
"rewards/real": 2.3698432445526123,
"step": 3490
},
{
"epoch": 2.24,
"learning_rate": 1.4087677725118482e-07,
"logits/generated": 6.513745307922363,
"logits/real": 4.433924198150635,
"logps/generated": -310.23089599609375,
"logps/real": -203.8213348388672,
"loss": 0.036,
"rewards/accuracies": 1.0,
"rewards/generated": -3.4215285778045654,
"rewards/margins": 5.807127952575684,
"rewards/real": 2.385599374771118,
"step": 3500
},
{
"epoch": 2.25,
"learning_rate": 1.396919431279621e-07,
"logits/generated": 6.5156755447387695,
"logits/real": 4.558250427246094,
"logps/generated": -299.29339599609375,
"logps/real": -218.6123046875,
"loss": 0.035,
"rewards/accuracies": 1.0,
"rewards/generated": -3.3458030223846436,
"rewards/margins": 5.673041820526123,
"rewards/real": 2.3272387981414795,
"step": 3510
},
{
"epoch": 2.25,
"learning_rate": 1.3850710900473934e-07,
"logits/generated": 6.494305610656738,
"logits/real": 5.065830707550049,
"logps/generated": -316.78814697265625,
"logps/real": -231.40750122070312,
"loss": 0.0381,
"rewards/accuracies": 1.0,
"rewards/generated": -3.5985469818115234,
"rewards/margins": 5.727441310882568,
"rewards/real": 2.128894805908203,
"step": 3520
},
{
"epoch": 2.26,
"learning_rate": 1.3732227488151656e-07,
"logits/generated": 6.41510009765625,
"logits/real": 4.9217329025268555,
"logps/generated": -318.82440185546875,
"logps/real": -246.50369262695312,
"loss": 0.0435,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.5909526348114014,
"rewards/margins": 6.108970642089844,
"rewards/real": 2.5180177688598633,
"step": 3530
},
{
"epoch": 2.26,
"learning_rate": 1.3613744075829384e-07,
"logits/generated": 6.501974582672119,
"logits/real": 4.702073097229004,
"logps/generated": -306.13214111328125,
"logps/real": -238.8610382080078,
"loss": 0.0517,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.403776168823242,
"rewards/margins": 5.4921417236328125,
"rewards/real": 2.0883657932281494,
"step": 3540
},
{
"epoch": 2.27,
"learning_rate": 1.3495260663507108e-07,
"logits/generated": 6.479678153991699,
"logits/real": 5.00525426864624,
"logps/generated": -333.7792663574219,
"logps/real": -235.4060516357422,
"loss": 0.0294,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.638486385345459,
"rewards/margins": 5.711910247802734,
"rewards/real": 2.0734241008758545,
"step": 3550
},
{
"epoch": 2.28,
"learning_rate": 1.3376777251184836e-07,
"logits/generated": 6.646874904632568,
"logits/real": 4.480135917663574,
"logps/generated": -319.85870361328125,
"logps/real": -205.182861328125,
"loss": 0.0435,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.5380020141601562,
"rewards/margins": 5.962095737457275,
"rewards/real": 2.424093723297119,
"step": 3560
},
{
"epoch": 2.28,
"learning_rate": 1.3258293838862558e-07,
"logits/generated": 6.473311424255371,
"logits/real": 4.572601795196533,
"logps/generated": -313.9909973144531,
"logps/real": -238.57861328125,
"loss": 0.0355,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.488398790359497,
"rewards/margins": 5.876242637634277,
"rewards/real": 2.3878438472747803,
"step": 3570
},
{
"epoch": 2.29,
"learning_rate": 1.3139810426540285e-07,
"logits/generated": 6.481254577636719,
"logits/real": 4.894389629364014,
"logps/generated": -322.4656677246094,
"logps/real": -231.29037475585938,
"loss": 0.0429,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.569429874420166,
"rewards/margins": 5.297967910766602,
"rewards/real": 1.7285382747650146,
"step": 3580
},
{
"epoch": 2.3,
"learning_rate": 1.302132701421801e-07,
"logits/generated": 6.44034481048584,
"logits/real": 4.870292663574219,
"logps/generated": -309.8015441894531,
"logps/real": -232.23141479492188,
"loss": 0.0466,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.57190203666687,
"rewards/margins": 5.714931488037109,
"rewards/real": 2.1430296897888184,
"step": 3590
},
{
"epoch": 2.3,
"learning_rate": 1.2902843601895734e-07,
"logits/generated": 6.628100395202637,
"logits/real": 4.56320333480835,
"logps/generated": -314.26617431640625,
"logps/real": -216.93435668945312,
"loss": 0.0364,
"rewards/accuracies": 1.0,
"rewards/generated": -3.669429302215576,
"rewards/margins": 5.688117980957031,
"rewards/real": 2.018688201904297,
"step": 3600
},
{
"epoch": 2.31,
"learning_rate": 1.278436018957346e-07,
"logits/generated": 6.5281829833984375,
"logits/real": 4.858192443847656,
"logps/generated": -314.5399169921875,
"logps/real": -231.82437133789062,
"loss": 0.0529,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.659435987472534,
"rewards/margins": 6.2276201248168945,
"rewards/real": 2.5681843757629395,
"step": 3610
},
{
"epoch": 2.32,
"learning_rate": 1.2665876777251183e-07,
"logits/generated": 6.622697353363037,
"logits/real": 4.539548397064209,
"logps/generated": -316.33612060546875,
"logps/real": -218.0032196044922,
"loss": 0.0514,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.372077465057373,
"rewards/margins": 5.659914493560791,
"rewards/real": 2.287837505340576,
"step": 3620
},
{
"epoch": 2.32,
"learning_rate": 1.254739336492891e-07,
"logits/generated": 6.434654235839844,
"logits/real": 5.143196105957031,
"logps/generated": -318.66705322265625,
"logps/real": -229.20541381835938,
"loss": 0.0388,
"rewards/accuracies": 1.0,
"rewards/generated": -3.7129311561584473,
"rewards/margins": 5.720685958862305,
"rewards/real": 2.007754325866699,
"step": 3630
},
{
"epoch": 2.33,
"learning_rate": 1.2428909952606635e-07,
"logits/generated": 6.507617950439453,
"logits/real": 4.2293171882629395,
"logps/generated": -331.07684326171875,
"logps/real": -223.13516235351562,
"loss": 0.0279,
"rewards/accuracies": 1.0,
"rewards/generated": -4.012661933898926,
"rewards/margins": 6.401325225830078,
"rewards/real": 2.3886632919311523,
"step": 3640
},
{
"epoch": 2.34,
"learning_rate": 1.231042654028436e-07,
"logits/generated": 6.525460243225098,
"logits/real": 4.729413986206055,
"logps/generated": -307.34771728515625,
"logps/real": -223.11129760742188,
"loss": 0.0319,
"rewards/accuracies": 1.0,
"rewards/generated": -3.219517230987549,
"rewards/margins": 5.384081840515137,
"rewards/real": 2.164564609527588,
"step": 3650
},
{
"epoch": 2.34,
"learning_rate": 1.2191943127962085e-07,
"logits/generated": 6.551595211029053,
"logits/real": 4.6989874839782715,
"logps/generated": -298.13812255859375,
"logps/real": -219.3531036376953,
"loss": 0.0519,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.5793490409851074,
"rewards/margins": 5.872319221496582,
"rewards/real": 2.292970657348633,
"step": 3660
},
{
"epoch": 2.35,
"learning_rate": 1.207345971563981e-07,
"logits/generated": 6.519845485687256,
"logits/real": 4.6157450675964355,
"logps/generated": -313.74615478515625,
"logps/real": -225.33602905273438,
"loss": 0.044,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.348008394241333,
"rewards/margins": 5.656905651092529,
"rewards/real": 2.3088979721069336,
"step": 3670
},
{
"epoch": 2.35,
"learning_rate": 1.1954976303317534e-07,
"logits/generated": 6.551909446716309,
"logits/real": 4.742045879364014,
"logps/generated": -308.15435791015625,
"logps/real": -239.033935546875,
"loss": 0.0385,
"rewards/accuracies": 1.0,
"rewards/generated": -3.4503626823425293,
"rewards/margins": 5.495227813720703,
"rewards/real": 2.0448646545410156,
"step": 3680
},
{
"epoch": 2.36,
"learning_rate": 1.183649289099526e-07,
"logits/generated": 6.58068323135376,
"logits/real": 4.629731178283691,
"logps/generated": -316.3744812011719,
"logps/real": -217.2517547607422,
"loss": 0.0322,
"rewards/accuracies": 1.0,
"rewards/generated": -3.685664415359497,
"rewards/margins": 6.379915714263916,
"rewards/real": 2.6942508220672607,
"step": 3690
},
{
"epoch": 2.37,
"learning_rate": 1.1718009478672986e-07,
"logits/generated": 6.6019697189331055,
"logits/real": 4.920955657958984,
"logps/generated": -315.7640380859375,
"logps/real": -215.07107543945312,
"loss": 0.0342,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.7241485118865967,
"rewards/margins": 5.824596405029297,
"rewards/real": 2.1004483699798584,
"step": 3700
},
{
"epoch": 2.37,
"learning_rate": 1.159952606635071e-07,
"logits/generated": 6.5531206130981445,
"logits/real": 4.487125396728516,
"logps/generated": -308.80859375,
"logps/real": -211.96389770507812,
"loss": 0.0252,
"rewards/accuracies": 1.0,
"rewards/generated": -3.3599178791046143,
"rewards/margins": 6.051535606384277,
"rewards/real": 2.6916182041168213,
"step": 3710
},
{
"epoch": 2.38,
"learning_rate": 1.1481042654028436e-07,
"logits/generated": 6.397873878479004,
"logits/real": 4.594054698944092,
"logps/generated": -307.98046875,
"logps/real": -209.40310668945312,
"loss": 0.0471,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.6652374267578125,
"rewards/margins": 6.013858795166016,
"rewards/real": 2.348621129989624,
"step": 3720
},
{
"epoch": 2.39,
"learning_rate": 1.136255924170616e-07,
"logits/generated": 6.375195026397705,
"logits/real": 4.987454414367676,
"logps/generated": -317.755859375,
"logps/real": -241.71255493164062,
"loss": 0.0497,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.5185465812683105,
"rewards/margins": 5.797640800476074,
"rewards/real": 2.2790942192077637,
"step": 3730
},
{
"epoch": 2.39,
"learning_rate": 1.1244075829383886e-07,
"logits/generated": 6.541880130767822,
"logits/real": 4.569333553314209,
"logps/generated": -318.06109619140625,
"logps/real": -230.28421020507812,
"loss": 0.0618,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.4923720359802246,
"rewards/margins": 5.879878044128418,
"rewards/real": 2.3875060081481934,
"step": 3740
},
{
"epoch": 2.4,
"learning_rate": 1.112559241706161e-07,
"logits/generated": 6.615804195404053,
"logits/real": 4.598508358001709,
"logps/generated": -315.1165771484375,
"logps/real": -230.8600616455078,
"loss": 0.0409,
"rewards/accuracies": 1.0,
"rewards/generated": -3.722174882888794,
"rewards/margins": 6.234826564788818,
"rewards/real": 2.512650966644287,
"step": 3750
},
{
"epoch": 2.41,
"learning_rate": 1.1007109004739336e-07,
"logits/generated": 6.621249198913574,
"logits/real": 4.556908130645752,
"logps/generated": -320.9785461425781,
"logps/real": -225.3008575439453,
"loss": 0.0337,
"rewards/accuracies": 1.0,
"rewards/generated": -3.7469630241394043,
"rewards/margins": 6.285904884338379,
"rewards/real": 2.5389418601989746,
"step": 3760
},
{
"epoch": 2.41,
"learning_rate": 1.0888625592417061e-07,
"logits/generated": 6.390491962432861,
"logits/real": 4.773979187011719,
"logps/generated": -312.04071044921875,
"logps/real": -249.9011688232422,
"loss": 0.041,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.3887813091278076,
"rewards/margins": 5.657594203948975,
"rewards/real": 2.268812894821167,
"step": 3770
},
{
"epoch": 2.42,
"learning_rate": 1.0770142180094787e-07,
"logits/generated": 6.476538181304932,
"logits/real": 4.669577598571777,
"logps/generated": -312.90032958984375,
"logps/real": -213.92166137695312,
"loss": 0.0449,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.6199536323547363,
"rewards/margins": 5.9859161376953125,
"rewards/real": 2.365962505340576,
"step": 3780
},
{
"epoch": 2.42,
"learning_rate": 1.0651658767772511e-07,
"logits/generated": 6.476927757263184,
"logits/real": 4.563714504241943,
"logps/generated": -315.058837890625,
"logps/real": -224.32577514648438,
"loss": 0.049,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.7334506511688232,
"rewards/margins": 6.3079400062561035,
"rewards/real": 2.5744881629943848,
"step": 3790
},
{
"epoch": 2.43,
"learning_rate": 1.0533175355450237e-07,
"logits/generated": 6.539498329162598,
"logits/real": 4.8311848640441895,
"logps/generated": -318.9219970703125,
"logps/real": -240.50894165039062,
"loss": 0.0457,
"rewards/accuracies": 1.0,
"rewards/generated": -3.6386666297912598,
"rewards/margins": 6.052326679229736,
"rewards/real": 2.4136602878570557,
"step": 3800
},
{
"epoch": 2.44,
"learning_rate": 1.0414691943127962e-07,
"logits/generated": 6.435731410980225,
"logits/real": 4.929614067077637,
"logps/generated": -316.7015686035156,
"logps/real": -229.21322631835938,
"loss": 0.043,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.8294575214385986,
"rewards/margins": 6.2790374755859375,
"rewards/real": 2.4495797157287598,
"step": 3810
},
{
"epoch": 2.44,
"learning_rate": 1.0296208530805687e-07,
"logits/generated": 6.627385139465332,
"logits/real": 4.485804557800293,
"logps/generated": -309.6326599121094,
"logps/real": -211.5670166015625,
"loss": 0.045,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -3.6681857109069824,
"rewards/margins": 6.135626316070557,
"rewards/real": 2.467440128326416,
"step": 3820
},
{
"epoch": 2.45,
"learning_rate": 1.0177725118483411e-07,
"logits/generated": 6.6564741134643555,
"logits/real": 4.68516206741333,
"logps/generated": -318.8467712402344,
"logps/real": -226.6106414794922,
"loss": 0.051,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.5700950622558594,
"rewards/margins": 5.723981857299805,
"rewards/real": 2.153886556625366,
"step": 3830
},
{
"epoch": 2.46,
"learning_rate": 1.0059241706161137e-07,
"logits/generated": 6.6741204261779785,
"logits/real": 4.536016941070557,
"logps/generated": -324.20159912109375,
"logps/real": -207.59896850585938,
"loss": 0.0416,
"rewards/accuracies": 1.0,
"rewards/generated": -3.788970470428467,
"rewards/margins": 6.435559272766113,
"rewards/real": 2.6465885639190674,
"step": 3840
},
{
"epoch": 2.46,
"learning_rate": 9.940758293838862e-08,
"logits/generated": 6.565242767333984,
"logits/real": 4.836869239807129,
"logps/generated": -303.9171447753906,
"logps/real": -200.09014892578125,
"loss": 0.0259,
"rewards/accuracies": 1.0,
"rewards/generated": -3.631394147872925,
"rewards/margins": 5.958956718444824,
"rewards/real": 2.327561855316162,
"step": 3850
},
{
"epoch": 2.47,
"learning_rate": 9.822274881516588e-08,
"logits/generated": 6.608918190002441,
"logits/real": 4.765702724456787,
"logps/generated": -311.97247314453125,
"logps/real": -218.27175903320312,
"loss": 0.0397,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.8027045726776123,
"rewards/margins": 6.115901470184326,
"rewards/real": 2.3131978511810303,
"step": 3860
},
{
"epoch": 2.48,
"learning_rate": 9.703791469194312e-08,
"logits/generated": 6.5209455490112305,
"logits/real": 4.784424781799316,
"logps/generated": -316.18646240234375,
"logps/real": -219.3183135986328,
"loss": 0.0465,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.898909330368042,
"rewards/margins": 5.930804252624512,
"rewards/real": 2.0318946838378906,
"step": 3870
},
{
"epoch": 2.48,
"learning_rate": 9.585308056872038e-08,
"logits/generated": 6.519847869873047,
"logits/real": 4.682705879211426,
"logps/generated": -316.4295349121094,
"logps/real": -221.3084259033203,
"loss": 0.022,
"rewards/accuracies": 1.0,
"rewards/generated": -3.6345527172088623,
"rewards/margins": 5.830044746398926,
"rewards/real": 2.1954920291900635,
"step": 3880
},
{
"epoch": 2.49,
"learning_rate": 9.466824644549763e-08,
"logits/generated": 6.437973976135254,
"logits/real": 4.786694526672363,
"logps/generated": -308.895263671875,
"logps/real": -247.1173095703125,
"loss": 0.0299,
"rewards/accuracies": 1.0,
"rewards/generated": -3.685072422027588,
"rewards/margins": 5.676372051239014,
"rewards/real": 1.9912999868392944,
"step": 3890
},
{
"epoch": 2.5,
"learning_rate": 9.348341232227488e-08,
"logits/generated": 6.578193664550781,
"logits/real": 5.0969648361206055,
"logps/generated": -321.40362548828125,
"logps/real": -244.7572479248047,
"loss": 0.0371,
"rewards/accuracies": 1.0,
"rewards/generated": -3.6856868267059326,
"rewards/margins": 5.648140907287598,
"rewards/real": 1.9624547958374023,
"step": 3900
},
{
"epoch": 2.5,
"learning_rate": 9.229857819905212e-08,
"logits/generated": 6.4852423667907715,
"logits/real": 4.911639213562012,
"logps/generated": -313.97930908203125,
"logps/real": -257.989013671875,
"loss": 0.0382,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.478628635406494,
"rewards/margins": 5.914477348327637,
"rewards/real": 2.435849666595459,
"step": 3910
},
{
"epoch": 2.51,
"learning_rate": 9.111374407582938e-08,
"logits/generated": 6.461939811706543,
"logits/real": 4.53496789932251,
"logps/generated": -324.73724365234375,
"logps/real": -236.884521484375,
"loss": 0.0395,
"rewards/accuracies": 1.0,
"rewards/generated": -3.916443347930908,
"rewards/margins": 5.9058918952941895,
"rewards/real": 1.9894483089447021,
"step": 3920
},
{
"epoch": 2.51,
"learning_rate": 8.992890995260663e-08,
"logits/generated": 6.4986162185668945,
"logits/real": 4.736280918121338,
"logps/generated": -315.95770263671875,
"logps/real": -232.65249633789062,
"loss": 0.0334,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.5177340507507324,
"rewards/margins": 5.86563777923584,
"rewards/real": 2.3479039669036865,
"step": 3930
},
{
"epoch": 2.52,
"learning_rate": 8.874407582938389e-08,
"logits/generated": 6.594623565673828,
"logits/real": 4.596661567687988,
"logps/generated": -294.2056579589844,
"logps/real": -208.23135375976562,
"loss": 0.0376,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.553370714187622,
"rewards/margins": 5.874009609222412,
"rewards/real": 2.3206381797790527,
"step": 3940
},
{
"epoch": 2.53,
"learning_rate": 8.755924170616114e-08,
"logits/generated": 6.560601234436035,
"logits/real": 4.890211582183838,
"logps/generated": -320.5434875488281,
"logps/real": -234.8424835205078,
"loss": 0.0322,
"rewards/accuracies": 1.0,
"rewards/generated": -3.717367649078369,
"rewards/margins": 5.8168792724609375,
"rewards/real": 2.0995113849639893,
"step": 3950
},
{
"epoch": 2.53,
"learning_rate": 8.63744075829384e-08,
"logits/generated": 6.643453121185303,
"logits/real": 4.418960094451904,
"logps/generated": -308.0656433105469,
"logps/real": -188.0706787109375,
"loss": 0.0344,
"rewards/accuracies": 1.0,
"rewards/generated": -3.6177737712860107,
"rewards/margins": 6.433516502380371,
"rewards/real": 2.8157434463500977,
"step": 3960
},
{
"epoch": 2.54,
"learning_rate": 8.518957345971564e-08,
"logits/generated": 6.609760284423828,
"logits/real": 4.693791389465332,
"logps/generated": -311.10589599609375,
"logps/real": -222.8950653076172,
"loss": 0.0359,
"rewards/accuracies": 1.0,
"rewards/generated": -3.7364165782928467,
"rewards/margins": 6.025433540344238,
"rewards/real": 2.2890164852142334,
"step": 3970
},
{
"epoch": 2.55,
"learning_rate": 8.40047393364929e-08,
"logits/generated": 6.644488334655762,
"logits/real": 4.728980541229248,
"logps/generated": -326.75518798828125,
"logps/real": -216.3810577392578,
"loss": 0.0326,
"rewards/accuracies": 1.0,
"rewards/generated": -3.6819024085998535,
"rewards/margins": 6.162990570068359,
"rewards/real": 2.4810874462127686,
"step": 3980
},
{
"epoch": 2.55,
"learning_rate": 8.281990521327013e-08,
"logits/generated": 6.576291561126709,
"logits/real": 4.637971878051758,
"logps/generated": -309.2288513183594,
"logps/real": -236.91561889648438,
"loss": 0.0378,
"rewards/accuracies": 1.0,
"rewards/generated": -3.5884525775909424,
"rewards/margins": 5.920731544494629,
"rewards/real": 2.3322794437408447,
"step": 3990
},
{
"epoch": 2.56,
"learning_rate": 8.163507109004738e-08,
"logits/generated": 6.310732841491699,
"logits/real": 4.888187885284424,
"logps/generated": -311.5484924316406,
"logps/real": -224.40628051757812,
"loss": 0.0343,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.938901901245117,
"rewards/margins": 6.3101019859313965,
"rewards/real": 2.3712000846862793,
"step": 4000
},
{
"epoch": 2.57,
"learning_rate": 8.045023696682464e-08,
"logits/generated": 6.325362205505371,
"logits/real": 5.112117767333984,
"logps/generated": -311.65325927734375,
"logps/real": -254.33285522460938,
"loss": 0.0511,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.4007866382598877,
"rewards/margins": 5.50778865814209,
"rewards/real": 2.1070024967193604,
"step": 4010
},
{
"epoch": 2.57,
"learning_rate": 7.926540284360189e-08,
"logits/generated": 6.5289106369018555,
"logits/real": 4.755041599273682,
"logps/generated": -333.82012939453125,
"logps/real": -237.4073486328125,
"loss": 0.0326,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.6312661170959473,
"rewards/margins": 6.053757667541504,
"rewards/real": 2.4224915504455566,
"step": 4020
},
{
"epoch": 2.58,
"learning_rate": 7.808056872037915e-08,
"logits/generated": 6.4816789627075195,
"logits/real": 4.328751087188721,
"logps/generated": -322.64190673828125,
"logps/real": -216.17178344726562,
"loss": 0.0482,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.9149162769317627,
"rewards/margins": 6.450268745422363,
"rewards/real": 2.5353522300720215,
"step": 4030
},
{
"epoch": 2.58,
"learning_rate": 7.689573459715639e-08,
"logits/generated": 6.6160569190979,
"logits/real": 4.253937244415283,
"logps/generated": -321.759765625,
"logps/real": -202.4170379638672,
"loss": 0.0239,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.693296432495117,
"rewards/margins": 5.80830192565918,
"rewards/real": 2.1150054931640625,
"step": 4040
},
{
"epoch": 2.59,
"learning_rate": 7.571090047393365e-08,
"logits/generated": 6.380744934082031,
"logits/real": 4.77320671081543,
"logps/generated": -306.7076110839844,
"logps/real": -220.3052215576172,
"loss": 0.0338,
"rewards/accuracies": 1.0,
"rewards/generated": -3.4694228172302246,
"rewards/margins": 5.889524936676025,
"rewards/real": 2.42010235786438,
"step": 4050
},
{
"epoch": 2.6,
"learning_rate": 7.45260663507109e-08,
"logits/generated": 6.456230163574219,
"logits/real": 4.660184860229492,
"logps/generated": -309.7335510253906,
"logps/real": -223.43399047851562,
"loss": 0.0355,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.4624855518341064,
"rewards/margins": 5.547595500946045,
"rewards/real": 2.0851101875305176,
"step": 4060
},
{
"epoch": 2.6,
"learning_rate": 7.334123222748814e-08,
"logits/generated": 6.5137529373168945,
"logits/real": 4.8226494789123535,
"logps/generated": -320.96771240234375,
"logps/real": -244.80807495117188,
"loss": 0.0315,
"rewards/accuracies": 1.0,
"rewards/generated": -3.9199230670928955,
"rewards/margins": 6.18032693862915,
"rewards/real": 2.2604031562805176,
"step": 4070
},
{
"epoch": 2.61,
"learning_rate": 7.215639810426539e-08,
"logits/generated": 6.472892761230469,
"logits/real": 4.793478012084961,
"logps/generated": -321.29473876953125,
"logps/real": -232.20382690429688,
"loss": 0.0445,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.531625270843506,
"rewards/margins": 6.006019592285156,
"rewards/real": 2.4743950366973877,
"step": 4080
},
{
"epoch": 2.62,
"learning_rate": 7.097156398104265e-08,
"logits/generated": 6.526330471038818,
"logits/real": 4.876141548156738,
"logps/generated": -308.0903625488281,
"logps/real": -241.17123413085938,
"loss": 0.0497,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.8017773628234863,
"rewards/margins": 6.233469486236572,
"rewards/real": 2.431692123413086,
"step": 4090
},
{
"epoch": 2.62,
"learning_rate": 6.97867298578199e-08,
"logits/generated": 6.507188320159912,
"logits/real": 4.934351444244385,
"logps/generated": -313.9234619140625,
"logps/real": -229.16958618164062,
"loss": 0.0412,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.8360989093780518,
"rewards/margins": 5.914695739746094,
"rewards/real": 2.0785973072052,
"step": 4100
},
{
"epoch": 2.63,
"learning_rate": 6.860189573459716e-08,
"logits/generated": 6.5582594871521,
"logits/real": 4.455166339874268,
"logps/generated": -316.6048583984375,
"logps/real": -216.8268280029297,
"loss": 0.0314,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.8321373462677,
"rewards/margins": 6.344393730163574,
"rewards/real": 2.512256383895874,
"step": 4110
},
{
"epoch": 2.64,
"learning_rate": 6.74170616113744e-08,
"logits/generated": 6.617920875549316,
"logits/real": 4.433808326721191,
"logps/generated": -310.179931640625,
"logps/real": -225.44900512695312,
"loss": 0.0409,
"rewards/accuracies": 1.0,
"rewards/generated": -3.6572768688201904,
"rewards/margins": 6.027648448944092,
"rewards/real": 2.3703715801239014,
"step": 4120
},
{
"epoch": 2.64,
"learning_rate": 6.623222748815166e-08,
"logits/generated": 6.532803535461426,
"logits/real": 4.4606032371521,
"logps/generated": -305.09393310546875,
"logps/real": -212.5522918701172,
"loss": 0.0441,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.38942289352417,
"rewards/margins": 5.628909587860107,
"rewards/real": 2.2394864559173584,
"step": 4130
},
{
"epoch": 2.65,
"learning_rate": 6.504739336492891e-08,
"logits/generated": 6.532896995544434,
"logits/real": 4.811502456665039,
"logps/generated": -321.75567626953125,
"logps/real": -236.20590209960938,
"loss": 0.0334,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.9196619987487793,
"rewards/margins": 6.389462471008301,
"rewards/real": 2.4698009490966797,
"step": 4140
},
{
"epoch": 2.66,
"learning_rate": 6.386255924170615e-08,
"logits/generated": 6.573578834533691,
"logits/real": 4.610198497772217,
"logps/generated": -330.74285888671875,
"logps/real": -208.0010986328125,
"loss": 0.0436,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.901912212371826,
"rewards/margins": 6.33354377746582,
"rewards/real": 2.431631326675415,
"step": 4150
},
{
"epoch": 2.66,
"learning_rate": 6.26777251184834e-08,
"logits/generated": 6.6127519607543945,
"logits/real": 4.326380252838135,
"logps/generated": -313.0176696777344,
"logps/real": -204.0683135986328,
"loss": 0.0283,
"rewards/accuracies": 1.0,
"rewards/generated": -3.7593460083007812,
"rewards/margins": 6.059396266937256,
"rewards/real": 2.3000504970550537,
"step": 4160
},
{
"epoch": 2.67,
"learning_rate": 6.149289099526066e-08,
"logits/generated": 6.535134792327881,
"logits/real": 4.897824287414551,
"logps/generated": -308.34429931640625,
"logps/real": -220.6827392578125,
"loss": 0.0532,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -3.668942928314209,
"rewards/margins": 5.9941911697387695,
"rewards/real": 2.325247287750244,
"step": 4170
},
{
"epoch": 2.67,
"learning_rate": 6.030805687203791e-08,
"logits/generated": 6.5091376304626465,
"logits/real": 4.820844650268555,
"logps/generated": -324.505859375,
"logps/real": -224.4438018798828,
"loss": 0.0174,
"rewards/accuracies": 1.0,
"rewards/generated": -3.988135814666748,
"rewards/margins": 6.7851080894470215,
"rewards/real": 2.7969725131988525,
"step": 4180
},
{
"epoch": 2.68,
"learning_rate": 5.912322274881516e-08,
"logits/generated": 6.506842136383057,
"logits/real": 4.628279685974121,
"logps/generated": -311.8347473144531,
"logps/real": -239.9702606201172,
"loss": 0.0313,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.3794751167297363,
"rewards/margins": 5.759786605834961,
"rewards/real": 2.380312204360962,
"step": 4190
},
{
"epoch": 2.69,
"learning_rate": 5.793838862559241e-08,
"logits/generated": 6.515402793884277,
"logits/real": 4.95352840423584,
"logps/generated": -318.76495361328125,
"logps/real": -234.7864227294922,
"loss": 0.0425,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.8171284198760986,
"rewards/margins": 6.229681968688965,
"rewards/real": 2.412553071975708,
"step": 4200
},
{
"epoch": 2.69,
"learning_rate": 5.6753554502369666e-08,
"logits/generated": 6.587221622467041,
"logits/real": 4.416947364807129,
"logps/generated": -324.5755310058594,
"logps/real": -213.75076293945312,
"loss": 0.0404,
"rewards/accuracies": 1.0,
"rewards/generated": -3.916365146636963,
"rewards/margins": 6.1400957107543945,
"rewards/real": 2.22373104095459,
"step": 4210
},
{
"epoch": 2.7,
"learning_rate": 5.556872037914691e-08,
"logits/generated": 6.564489841461182,
"logits/real": 4.871767997741699,
"logps/generated": -308.17706298828125,
"logps/real": -221.3224639892578,
"loss": 0.0365,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.5159783363342285,
"rewards/margins": 5.63589334487915,
"rewards/real": 2.11991548538208,
"step": 4220
},
{
"epoch": 2.71,
"learning_rate": 5.4383886255924165e-08,
"logits/generated": 6.402759552001953,
"logits/real": 4.591031074523926,
"logps/generated": -328.0219421386719,
"logps/real": -236.8424072265625,
"loss": 0.0356,
"rewards/accuracies": 1.0,
"rewards/generated": -3.7980494499206543,
"rewards/margins": 6.346069812774658,
"rewards/real": 2.5480198860168457,
"step": 4230
},
{
"epoch": 2.71,
"learning_rate": 5.319905213270142e-08,
"logits/generated": 6.591435432434082,
"logits/real": 4.606555938720703,
"logps/generated": -303.12579345703125,
"logps/real": -228.23226928710938,
"loss": 0.0447,
"rewards/accuracies": 1.0,
"rewards/generated": -3.5328078269958496,
"rewards/margins": 6.194054126739502,
"rewards/real": 2.6612462997436523,
"step": 4240
},
{
"epoch": 2.72,
"learning_rate": 5.201421800947867e-08,
"logits/generated": 6.483080863952637,
"logits/real": 4.758042335510254,
"logps/generated": -307.1785583496094,
"logps/real": -221.1710662841797,
"loss": 0.0277,
"rewards/accuracies": 1.0,
"rewards/generated": -3.7217071056365967,
"rewards/margins": 6.27499532699585,
"rewards/real": 2.553287982940674,
"step": 4250
},
{
"epoch": 2.73,
"learning_rate": 5.082938388625592e-08,
"logits/generated": 6.502976417541504,
"logits/real": 4.846578121185303,
"logps/generated": -317.02081298828125,
"logps/real": -225.8799591064453,
"loss": 0.037,
"rewards/accuracies": 1.0,
"rewards/generated": -3.6737067699432373,
"rewards/margins": 5.691414833068848,
"rewards/real": 2.0177078247070312,
"step": 4260
},
{
"epoch": 2.73,
"learning_rate": 4.964454976303317e-08,
"logits/generated": 6.714502811431885,
"logits/real": 4.4899725914001465,
"logps/generated": -320.1687316894531,
"logps/real": -220.10842895507812,
"loss": 0.032,
"rewards/accuracies": 1.0,
"rewards/generated": -3.954611301422119,
"rewards/margins": 6.4107770919799805,
"rewards/real": 2.4561660289764404,
"step": 4270
},
{
"epoch": 2.74,
"learning_rate": 4.845971563981042e-08,
"logits/generated": 6.47867488861084,
"logits/real": 4.510740756988525,
"logps/generated": -313.6661682128906,
"logps/real": -206.0166015625,
"loss": 0.0378,
"rewards/accuracies": 1.0,
"rewards/generated": -3.6164774894714355,
"rewards/margins": 5.649336814880371,
"rewards/real": 2.0328593254089355,
"step": 4280
},
{
"epoch": 2.74,
"learning_rate": 4.7274881516587676e-08,
"logits/generated": 6.634936332702637,
"logits/real": 4.846175670623779,
"logps/generated": -327.55364990234375,
"logps/real": -215.91610717773438,
"loss": 0.0275,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -4.14184045791626,
"rewards/margins": 6.476358890533447,
"rewards/real": 2.3345181941986084,
"step": 4290
},
{
"epoch": 2.75,
"learning_rate": 4.609004739336492e-08,
"logits/generated": 6.581854820251465,
"logits/real": 4.857382774353027,
"logps/generated": -305.4502258300781,
"logps/real": -209.5597381591797,
"loss": 0.0448,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.655374526977539,
"rewards/margins": 6.246061325073242,
"rewards/real": 2.5906870365142822,
"step": 4300
},
{
"epoch": 2.76,
"learning_rate": 4.4905213270142176e-08,
"logits/generated": 6.5722198486328125,
"logits/real": 4.7393646240234375,
"logps/generated": -326.43719482421875,
"logps/real": -233.70809936523438,
"loss": 0.0363,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.8187355995178223,
"rewards/margins": 6.1232428550720215,
"rewards/real": 2.3045077323913574,
"step": 4310
},
{
"epoch": 2.76,
"learning_rate": 4.372037914691943e-08,
"logits/generated": 6.555207252502441,
"logits/real": 4.758434295654297,
"logps/generated": -326.94232177734375,
"logps/real": -235.101806640625,
"loss": 0.025,
"rewards/accuracies": 1.0,
"rewards/generated": -3.6171951293945312,
"rewards/margins": 6.253279685974121,
"rewards/real": 2.636084794998169,
"step": 4320
},
{
"epoch": 2.77,
"learning_rate": 4.253554502369668e-08,
"logits/generated": 6.525106906890869,
"logits/real": 5.279546737670898,
"logps/generated": -331.38818359375,
"logps/real": -274.1283264160156,
"loss": 0.0272,
"rewards/accuracies": 1.0,
"rewards/generated": -3.6407923698425293,
"rewards/margins": 6.059579372406006,
"rewards/real": 2.4187867641448975,
"step": 4330
},
{
"epoch": 2.78,
"learning_rate": 4.135071090047393e-08,
"logits/generated": 6.604770660400391,
"logits/real": 4.898898601531982,
"logps/generated": -330.6890563964844,
"logps/real": -254.55038452148438,
"loss": 0.0362,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.8300297260284424,
"rewards/margins": 6.490457057952881,
"rewards/real": 2.6604273319244385,
"step": 4340
},
{
"epoch": 2.78,
"learning_rate": 4.016587677725118e-08,
"logits/generated": 6.4878034591674805,
"logits/real": 4.653387069702148,
"logps/generated": -306.11895751953125,
"logps/real": -233.1724395751953,
"loss": 0.0334,
"rewards/accuracies": 1.0,
"rewards/generated": -3.7691116333007812,
"rewards/margins": 6.210760593414307,
"rewards/real": 2.441648006439209,
"step": 4350
},
{
"epoch": 2.79,
"learning_rate": 3.8981042654028434e-08,
"logits/generated": 6.604249477386475,
"logits/real": 4.4642558097839355,
"logps/generated": -328.152099609375,
"logps/real": -215.21151733398438,
"loss": 0.0333,
"rewards/accuracies": 1.0,
"rewards/generated": -3.84818696975708,
"rewards/margins": 6.3337178230285645,
"rewards/real": 2.485531806945801,
"step": 4360
},
{
"epoch": 2.8,
"learning_rate": 3.779620853080569e-08,
"logits/generated": 6.567579746246338,
"logits/real": 4.3342695236206055,
"logps/generated": -319.4460144042969,
"logps/real": -200.232177734375,
"loss": 0.0331,
"rewards/accuracies": 1.0,
"rewards/generated": -3.974278211593628,
"rewards/margins": 6.716505527496338,
"rewards/real": 2.7422266006469727,
"step": 4370
},
{
"epoch": 2.8,
"learning_rate": 3.661137440758294e-08,
"logits/generated": 6.469827175140381,
"logits/real": 4.755372047424316,
"logps/generated": -321.1123046875,
"logps/real": -220.9445343017578,
"loss": 0.0342,
"rewards/accuracies": 1.0,
"rewards/generated": -3.819784641265869,
"rewards/margins": 6.050178050994873,
"rewards/real": 2.230393171310425,
"step": 4380
},
{
"epoch": 2.81,
"learning_rate": 3.5426540284360186e-08,
"logits/generated": 6.43978214263916,
"logits/real": 5.01400899887085,
"logps/generated": -317.6650390625,
"logps/real": -221.50173950195312,
"loss": 0.0359,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.7374751567840576,
"rewards/margins": 6.025723934173584,
"rewards/real": 2.2882485389709473,
"step": 4390
},
{
"epoch": 2.82,
"learning_rate": 3.424170616113744e-08,
"logits/generated": 6.483642578125,
"logits/real": 4.6836934089660645,
"logps/generated": -328.49017333984375,
"logps/real": -233.32470703125,
"loss": 0.0285,
"rewards/accuracies": 1.0,
"rewards/generated": -4.040391445159912,
"rewards/margins": 6.6246747970581055,
"rewards/real": 2.5842835903167725,
"step": 4400
},
{
"epoch": 2.82,
"learning_rate": 3.305687203791469e-08,
"logits/generated": 6.557009220123291,
"logits/real": 4.679731369018555,
"logps/generated": -314.7176513671875,
"logps/real": -220.220458984375,
"loss": 0.0218,
"rewards/accuracies": 1.0,
"rewards/generated": -3.6416420936584473,
"rewards/margins": 6.076503753662109,
"rewards/real": 2.4348621368408203,
"step": 4410
},
{
"epoch": 2.83,
"learning_rate": 3.1872037914691945e-08,
"logits/generated": 6.50725793838501,
"logits/real": 4.735629558563232,
"logps/generated": -319.73260498046875,
"logps/real": -205.85287475585938,
"loss": 0.0332,
"rewards/accuracies": 1.0,
"rewards/generated": -3.5968334674835205,
"rewards/margins": 5.893436908721924,
"rewards/real": 2.2966036796569824,
"step": 4420
},
{
"epoch": 2.83,
"learning_rate": 3.068720379146919e-08,
"logits/generated": 6.5144171714782715,
"logits/real": 4.513826847076416,
"logps/generated": -318.6091613769531,
"logps/real": -218.3349151611328,
"loss": 0.041,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.7594285011291504,
"rewards/margins": 6.054383754730225,
"rewards/real": 2.2949557304382324,
"step": 4430
},
{
"epoch": 2.84,
"learning_rate": 2.9502369668246444e-08,
"logits/generated": 6.47725772857666,
"logits/real": 4.8379034996032715,
"logps/generated": -318.28778076171875,
"logps/real": -225.16415405273438,
"loss": 0.0263,
"rewards/accuracies": 1.0,
"rewards/generated": -3.90997052192688,
"rewards/margins": 6.520627021789551,
"rewards/real": 2.6106560230255127,
"step": 4440
},
{
"epoch": 2.85,
"learning_rate": 2.8317535545023697e-08,
"logits/generated": 6.458238124847412,
"logits/real": 4.9540228843688965,
"logps/generated": -315.47100830078125,
"logps/real": -216.9081268310547,
"loss": 0.0361,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.684654712677002,
"rewards/margins": 6.162962436676025,
"rewards/real": 2.4783077239990234,
"step": 4450
},
{
"epoch": 2.85,
"learning_rate": 2.7132701421800947e-08,
"logits/generated": 6.424310207366943,
"logits/real": 4.891389846801758,
"logps/generated": -316.9339904785156,
"logps/real": -235.1954345703125,
"loss": 0.0367,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.7844338417053223,
"rewards/margins": 6.09124755859375,
"rewards/real": 2.306814193725586,
"step": 4460
},
{
"epoch": 2.86,
"learning_rate": 2.59478672985782e-08,
"logits/generated": 6.582629203796387,
"logits/real": 4.89013671875,
"logps/generated": -328.97601318359375,
"logps/real": -229.2420654296875,
"loss": 0.0387,
"rewards/accuracies": 1.0,
"rewards/generated": -3.919386386871338,
"rewards/margins": 6.390198707580566,
"rewards/real": 2.4708125591278076,
"step": 4470
},
{
"epoch": 2.87,
"learning_rate": 2.476303317535545e-08,
"logits/generated": 6.474552154541016,
"logits/real": 4.8046979904174805,
"logps/generated": -309.959716796875,
"logps/real": -234.17251586914062,
"loss": 0.0466,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -3.949699878692627,
"rewards/margins": 6.123934745788574,
"rewards/real": 2.1742348670959473,
"step": 4480
},
{
"epoch": 2.87,
"learning_rate": 2.3578199052132702e-08,
"logits/generated": 6.575322151184082,
"logits/real": 4.4408369064331055,
"logps/generated": -315.2879333496094,
"logps/real": -226.00564575195312,
"loss": 0.0441,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.301480770111084,
"rewards/margins": 5.636345863342285,
"rewards/real": 2.3348641395568848,
"step": 4490
},
{
"epoch": 2.88,
"learning_rate": 2.239336492890995e-08,
"logits/generated": 6.517224311828613,
"logits/real": 4.958826065063477,
"logps/generated": -314.48382568359375,
"logps/real": -232.69461059570312,
"loss": 0.0376,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.824894428253174,
"rewards/margins": 6.043306827545166,
"rewards/real": 2.2184131145477295,
"step": 4500
},
{
"epoch": 2.89,
"learning_rate": 2.1208530805687202e-08,
"logits/generated": 6.625432014465332,
"logits/real": 4.315027713775635,
"logps/generated": -319.8591003417969,
"logps/real": -203.87966918945312,
"loss": 0.0411,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.930284023284912,
"rewards/margins": 6.055028915405273,
"rewards/real": 2.1247451305389404,
"step": 4510
},
{
"epoch": 2.89,
"learning_rate": 2.002369668246445e-08,
"logits/generated": 6.50540828704834,
"logits/real": 5.131866455078125,
"logps/generated": -318.6473693847656,
"logps/real": -250.11904907226562,
"loss": 0.0387,
"rewards/accuracies": 1.0,
"rewards/generated": -3.9697766304016113,
"rewards/margins": 5.962163925170898,
"rewards/real": 1.9923874139785767,
"step": 4520
},
{
"epoch": 2.9,
"learning_rate": 1.8838862559241704e-08,
"logits/generated": 6.527606964111328,
"logits/real": 4.929044723510742,
"logps/generated": -307.16680908203125,
"logps/real": -220.23678588867188,
"loss": 0.0262,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.7975895404815674,
"rewards/margins": 6.474527835845947,
"rewards/real": 2.67693829536438,
"step": 4530
},
{
"epoch": 2.9,
"learning_rate": 1.7654028436018954e-08,
"logits/generated": 6.473954677581787,
"logits/real": 4.672419548034668,
"logps/generated": -316.3739013671875,
"logps/real": -218.72024536132812,
"loss": 0.0253,
"rewards/accuracies": 1.0,
"rewards/generated": -3.7095909118652344,
"rewards/margins": 6.246824741363525,
"rewards/real": 2.537233829498291,
"step": 4540
},
{
"epoch": 2.91,
"learning_rate": 1.6469194312796207e-08,
"logits/generated": 6.356590747833252,
"logits/real": 5.3553056716918945,
"logps/generated": -313.9323425292969,
"logps/real": -238.56777954101562,
"loss": 0.0336,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -4.027235984802246,
"rewards/margins": 6.5537543296813965,
"rewards/real": 2.5265183448791504,
"step": 4550
},
{
"epoch": 2.92,
"learning_rate": 1.528436018957346e-08,
"logits/generated": 6.637836456298828,
"logits/real": 4.594275951385498,
"logps/generated": -310.762939453125,
"logps/real": -227.982177734375,
"loss": 0.0364,
"rewards/accuracies": 1.0,
"rewards/generated": -3.674046754837036,
"rewards/margins": 6.259788990020752,
"rewards/real": 2.585742473602295,
"step": 4560
},
{
"epoch": 2.92,
"learning_rate": 1.409952606635071e-08,
"logits/generated": 6.587820529937744,
"logits/real": 4.828711032867432,
"logps/generated": -322.9399719238281,
"logps/real": -231.203857421875,
"loss": 0.0363,
"rewards/accuracies": 1.0,
"rewards/generated": -3.4187331199645996,
"rewards/margins": 5.429445266723633,
"rewards/real": 2.0107123851776123,
"step": 4570
},
{
"epoch": 2.93,
"learning_rate": 1.2914691943127961e-08,
"logits/generated": 6.576046943664551,
"logits/real": 4.6913862228393555,
"logps/generated": -310.24871826171875,
"logps/real": -215.4926300048828,
"loss": 0.0441,
"rewards/accuracies": 1.0,
"rewards/generated": -3.60530424118042,
"rewards/margins": 5.837168216705322,
"rewards/real": 2.2318644523620605,
"step": 4580
},
{
"epoch": 2.94,
"learning_rate": 1.1729857819905212e-08,
"logits/generated": 6.390922546386719,
"logits/real": 5.047102451324463,
"logps/generated": -316.705322265625,
"logps/real": -221.8660125732422,
"loss": 0.0365,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.7050914764404297,
"rewards/margins": 6.110095500946045,
"rewards/real": 2.4050049781799316,
"step": 4590
},
{
"epoch": 2.94,
"learning_rate": 1.0545023696682464e-08,
"logits/generated": 6.420653343200684,
"logits/real": 4.74511194229126,
"logps/generated": -317.0543212890625,
"logps/real": -218.3509979248047,
"loss": 0.0397,
"rewards/accuracies": 1.0,
"rewards/generated": -3.9743943214416504,
"rewards/margins": 6.493072509765625,
"rewards/real": 2.5186777114868164,
"step": 4600
},
{
"epoch": 2.95,
"learning_rate": 9.360189573459715e-09,
"logits/generated": 6.5571417808532715,
"logits/real": 4.7767133712768555,
"logps/generated": -309.4183349609375,
"logps/real": -237.371826171875,
"loss": 0.0329,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.3121707439422607,
"rewards/margins": 5.517508506774902,
"rewards/real": 2.2053380012512207,
"step": 4610
},
{
"epoch": 2.96,
"learning_rate": 8.175355450236966e-09,
"logits/generated": 6.605482578277588,
"logits/real": 4.707463264465332,
"logps/generated": -327.2000427246094,
"logps/real": -206.5057373046875,
"loss": 0.0279,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.9358227252960205,
"rewards/margins": 6.49956750869751,
"rewards/real": 2.56374454498291,
"step": 4620
},
{
"epoch": 2.96,
"learning_rate": 6.990521327014218e-09,
"logits/generated": 6.495814323425293,
"logits/real": 4.9706573486328125,
"logps/generated": -296.98992919921875,
"logps/real": -226.7158966064453,
"loss": 0.0287,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.4257378578186035,
"rewards/margins": 5.643680572509766,
"rewards/real": 2.217942714691162,
"step": 4630
},
{
"epoch": 2.97,
"learning_rate": 5.805687203791469e-09,
"logits/generated": 6.507977485656738,
"logits/real": 4.830941200256348,
"logps/generated": -309.84075927734375,
"logps/real": -210.8922119140625,
"loss": 0.0384,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.37202525138855,
"rewards/margins": 5.505019187927246,
"rewards/real": 2.1329941749572754,
"step": 4640
},
{
"epoch": 2.98,
"learning_rate": 4.62085308056872e-09,
"logits/generated": 6.521729469299316,
"logits/real": 4.934253215789795,
"logps/generated": -309.5820007324219,
"logps/real": -222.89987182617188,
"loss": 0.0527,
"rewards/accuracies": 1.0,
"rewards/generated": -3.6575589179992676,
"rewards/margins": 5.68410062789917,
"rewards/real": 2.0265424251556396,
"step": 4650
},
{
"epoch": 2.98,
"learning_rate": 3.4360189573459714e-09,
"logits/generated": 6.571385860443115,
"logits/real": 4.746085166931152,
"logps/generated": -322.11639404296875,
"logps/real": -239.91476440429688,
"loss": 0.0411,
"rewards/accuracies": 1.0,
"rewards/generated": -3.9839770793914795,
"rewards/margins": 6.489884376525879,
"rewards/real": 2.5059072971343994,
"step": 4660
},
{
"epoch": 2.99,
"learning_rate": 2.2511848341232227e-09,
"logits/generated": 6.565645694732666,
"logits/real": 4.96927547454834,
"logps/generated": -318.505859375,
"logps/real": -256.3047180175781,
"loss": 0.0367,
"rewards/accuracies": 1.0,
"rewards/generated": -3.7661774158477783,
"rewards/margins": 5.747965335845947,
"rewards/real": 1.981787919998169,
"step": 4670
},
{
"epoch": 2.99,
"learning_rate": 1.0663507109004738e-09,
"logits/generated": 6.504978179931641,
"logits/real": 4.623128414154053,
"logps/generated": -328.41619873046875,
"logps/real": -214.21871948242188,
"loss": 0.0385,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.7540199756622314,
"rewards/margins": 5.994838237762451,
"rewards/real": 2.2408182621002197,
"step": 4680
},
{
"epoch": 3.0,
"step": 4689,
"total_flos": 0.0,
"train_loss": 0.16397903456657928,
"train_runtime": 29965.1509,
"train_samples_per_second": 5.006,
"train_steps_per_second": 0.156
}
],
"logging_steps": 10,
"max_steps": 4689,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}