{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 4689, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.066098081023454e-09, "logits/generated": 6.313449382781982, "logits/real": 4.503366947174072, "logps/generated": -273.765380859375, "logps/real": -221.5892333984375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 1.0660980810234541e-08, "logits/generated": 6.417145729064941, "logits/real": 4.833721160888672, "logps/generated": -281.4638366699219, "logps/real": -251.9319610595703, "loss": 0.6946, "rewards/accuracies": 0.0416666679084301, "rewards/generated": 0.0008282132912427187, "rewards/margins": -0.0019732369109988213, "rewards/real": -0.0011450237361714244, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.1321961620469082e-08, "logits/generated": 6.508301734924316, "logits/real": 4.467240333557129, "logps/generated": -281.99346923828125, "logps/real": -244.68807983398438, "loss": 0.6907, "rewards/accuracies": 0.42500001192092896, "rewards/generated": -0.01464166771620512, "rewards/margins": 0.011996939778327942, "rewards/real": -0.0026447249110788107, "step": 20 }, { "epoch": 0.02, "learning_rate": 3.1982942430703625e-08, "logits/generated": 6.402890682220459, "logits/real": 4.9113664627075195, "logps/generated": -280.8014831542969, "logps/real": -265.4349365234375, "loss": 0.6959, "rewards/accuracies": 0.5874999761581421, "rewards/generated": 0.0067442902363836765, "rewards/margins": 0.00477126520127058, "rewards/real": 0.011515555903315544, "step": 30 }, { "epoch": 0.03, "learning_rate": 4.2643923240938164e-08, "logits/generated": 6.590306758880615, "logits/real": 4.578217029571533, "logps/generated": -270.75592041015625, "logps/real": -240.4984130859375, "loss": 0.7, "rewards/accuracies": 0.512499988079071, "rewards/generated": 0.0005118753761053085, "rewards/margins": -0.0064646475948393345, "rewards/real": -0.005952772684395313, "step": 40 }, { "epoch": 0.03, "learning_rate": 5.3304904051172704e-08, "logits/generated": 6.504673957824707, "logits/real": 4.641873359680176, "logps/generated": -276.2430419921875, "logps/real": -224.84249877929688, "loss": 0.6889, "rewards/accuracies": 0.512499988079071, "rewards/generated": -0.033875368535518646, "rewards/margins": 0.02717522345483303, "rewards/real": -0.006700146943330765, "step": 50 }, { "epoch": 0.04, "learning_rate": 6.396588486140725e-08, "logits/generated": 6.473546504974365, "logits/real": 5.055091381072998, "logps/generated": -289.09674072265625, "logps/real": -243.2639923095703, "loss": 0.6977, "rewards/accuracies": 0.574999988079071, "rewards/generated": -0.015207557007670403, "rewards/margins": 0.020673025399446487, "rewards/real": 0.00546546746045351, "step": 60 }, { "epoch": 0.04, "learning_rate": 7.462686567164178e-08, "logits/generated": 6.487510681152344, "logits/real": 4.849614143371582, "logps/generated": -283.6287536621094, "logps/real": -268.0138854980469, "loss": 0.6988, "rewards/accuracies": 0.5, "rewards/generated": -0.020940685644745827, "rewards/margins": 0.0006496586138382554, "rewards/real": -0.020291026681661606, "step": 70 }, { "epoch": 0.05, "learning_rate": 8.528784648187633e-08, "logits/generated": 6.551278114318848, "logits/real": 4.523703575134277, "logps/generated": -283.064697265625, "logps/real": -237.7630157470703, "loss": 0.6961, "rewards/accuracies": 0.4625000059604645, "rewards/generated": -0.003950852435082197, "rewards/margins": 0.003412533551454544, "rewards/real": -0.0005383208626881242, "step": 80 }, { "epoch": 0.06, "learning_rate": 9.594882729211087e-08, "logits/generated": 6.489449501037598, "logits/real": 4.616640090942383, "logps/generated": -294.86614990234375, "logps/real": -257.0780029296875, "loss": 0.6965, "rewards/accuracies": 0.512499988079071, "rewards/generated": -0.001603059470653534, "rewards/margins": -0.0017856752965599298, "rewards/real": -0.0033887363970279694, "step": 90 }, { "epoch": 0.06, "learning_rate": 1.0660980810234541e-07, "logits/generated": 6.500540256500244, "logits/real": 4.6778364181518555, "logps/generated": -280.27447509765625, "logps/real": -240.3990020751953, "loss": 0.6985, "rewards/accuracies": 0.48750001192092896, "rewards/generated": 0.01377450954169035, "rewards/margins": -0.026643192395567894, "rewards/real": -0.01286868192255497, "step": 100 }, { "epoch": 0.07, "learning_rate": 1.1727078891257995e-07, "logits/generated": 6.49435567855835, "logits/real": 4.850351810455322, "logps/generated": -284.07623291015625, "logps/real": -256.1934814453125, "loss": 0.699, "rewards/accuracies": 0.5249999761581421, "rewards/generated": 0.002641477854922414, "rewards/margins": 0.004401583690196276, "rewards/real": 0.007043060846626759, "step": 110 }, { "epoch": 0.08, "learning_rate": 1.279317697228145e-07, "logits/generated": 6.4412078857421875, "logits/real": 4.881103038787842, "logps/generated": -281.96685791015625, "logps/real": -240.1349334716797, "loss": 0.7007, "rewards/accuracies": 0.5375000238418579, "rewards/generated": -0.010973912663757801, "rewards/margins": -0.00332402135245502, "rewards/real": -0.014297932386398315, "step": 120 }, { "epoch": 0.08, "learning_rate": 1.3859275053304903e-07, "logits/generated": 6.572972297668457, "logits/real": 4.993831634521484, "logps/generated": -278.8970031738281, "logps/real": -253.2180633544922, "loss": 0.6918, "rewards/accuracies": 0.5625, "rewards/generated": -0.0238101277500391, "rewards/margins": 0.015025329776108265, "rewards/real": -0.00878479890525341, "step": 130 }, { "epoch": 0.09, "learning_rate": 1.4925373134328355e-07, "logits/generated": 6.496808052062988, "logits/real": 4.527266979217529, "logps/generated": -286.42230224609375, "logps/real": -238.97653198242188, "loss": 0.6908, "rewards/accuracies": 0.550000011920929, "rewards/generated": -0.015367841348052025, "rewards/margins": 0.015413427725434303, "rewards/real": 4.558637738227844e-05, "step": 140 }, { "epoch": 0.1, "learning_rate": 1.5991471215351813e-07, "logits/generated": 6.386403560638428, "logits/real": 5.1331682205200195, "logps/generated": -288.5858459472656, "logps/real": -252.1357879638672, "loss": 0.6973, "rewards/accuracies": 0.4749999940395355, "rewards/generated": 0.001897630630992353, "rewards/margins": -0.013987990096211433, "rewards/real": -0.012090359814465046, "step": 150 }, { "epoch": 0.1, "learning_rate": 1.7057569296375266e-07, "logits/generated": 6.469916343688965, "logits/real": 4.803389549255371, "logps/generated": -279.75665283203125, "logps/real": -249.20278930664062, "loss": 0.6928, "rewards/accuracies": 0.4749999940395355, "rewards/generated": -0.00042929648770950735, "rewards/margins": -0.006332563702017069, "rewards/real": -0.0067618610337376595, "step": 160 }, { "epoch": 0.11, "learning_rate": 1.8123667377398718e-07, "logits/generated": 6.469520568847656, "logits/real": 4.792954444885254, "logps/generated": -278.6644592285156, "logps/real": -238.6741485595703, "loss": 0.694, "rewards/accuracies": 0.550000011920929, "rewards/generated": -0.006913432385772467, "rewards/margins": 0.017923034727573395, "rewards/real": 0.01100960187613964, "step": 170 }, { "epoch": 0.12, "learning_rate": 1.9189765458422174e-07, "logits/generated": 6.304480075836182, "logits/real": 5.01064395904541, "logps/generated": -292.3460388183594, "logps/real": -263.7362976074219, "loss": 0.7018, "rewards/accuracies": 0.44999998807907104, "rewards/generated": 0.00147080363240093, "rewards/margins": -0.01612243428826332, "rewards/real": -0.014651629142463207, "step": 180 }, { "epoch": 0.12, "learning_rate": 2.025586353944563e-07, "logits/generated": 6.497210502624512, "logits/real": 4.78942346572876, "logps/generated": -282.570068359375, "logps/real": -263.93743896484375, "loss": 0.6982, "rewards/accuracies": 0.4375, "rewards/generated": 0.003275098744779825, "rewards/margins": -0.005347794853150845, "rewards/real": -0.0020726968068629503, "step": 190 }, { "epoch": 0.13, "learning_rate": 2.1321961620469082e-07, "logits/generated": 6.557346343994141, "logits/real": 4.690012454986572, "logps/generated": -289.92633056640625, "logps/real": -247.98934936523438, "loss": 0.6967, "rewards/accuracies": 0.5, "rewards/generated": -0.004312173463404179, "rewards/margins": 0.004754967056214809, "rewards/real": 0.00044279481517151, "step": 200 }, { "epoch": 0.13, "learning_rate": 2.2388059701492537e-07, "logits/generated": 6.627219200134277, "logits/real": 4.5740742683410645, "logps/generated": -274.96441650390625, "logps/real": -227.3006591796875, "loss": 0.6883, "rewards/accuracies": 0.550000011920929, "rewards/generated": -0.019664818421006203, "rewards/margins": 0.017926085740327835, "rewards/real": -0.0017387343104928732, "step": 210 }, { "epoch": 0.14, "learning_rate": 2.345415778251599e-07, "logits/generated": 6.484269618988037, "logits/real": 4.570880889892578, "logps/generated": -285.11175537109375, "logps/real": -222.089111328125, "loss": 0.6939, "rewards/accuracies": 0.48750001192092896, "rewards/generated": -0.00420150812715292, "rewards/margins": -0.003182200016453862, "rewards/real": -0.007383708842098713, "step": 220 }, { "epoch": 0.15, "learning_rate": 2.452025586353944e-07, "logits/generated": 6.622496128082275, "logits/real": 4.402390480041504, "logps/generated": -283.72247314453125, "logps/real": -238.875244140625, "loss": 0.6884, "rewards/accuracies": 0.625, "rewards/generated": -0.0316481851041317, "rewards/margins": 0.037973009049892426, "rewards/real": 0.00632482161745429, "step": 230 }, { "epoch": 0.15, "learning_rate": 2.55863539445629e-07, "logits/generated": 6.519092559814453, "logits/real": 5.1569108963012695, "logps/generated": -294.76727294921875, "logps/real": -287.53533935546875, "loss": 0.6922, "rewards/accuracies": 0.5375000238418579, "rewards/generated": -0.0013995547778904438, "rewards/margins": 0.0036062332801520824, "rewards/real": 0.002206678967922926, "step": 240 }, { "epoch": 0.16, "learning_rate": 2.665245202558635e-07, "logits/generated": 6.4200615882873535, "logits/real": 4.65548038482666, "logps/generated": -285.3873596191406, "logps/real": -228.32009887695312, "loss": 0.6837, "rewards/accuracies": 0.512499988079071, "rewards/generated": 0.003386992961168289, "rewards/margins": 0.010126499459147453, "rewards/real": 0.013513492420315742, "step": 250 }, { "epoch": 0.17, "learning_rate": 2.7718550106609805e-07, "logits/generated": 6.4657487869262695, "logits/real": 4.301976203918457, "logps/generated": -280.96539306640625, "logps/real": -224.1595458984375, "loss": 0.6919, "rewards/accuracies": 0.550000011920929, "rewards/generated": -0.006485415156930685, "rewards/margins": 0.011584701016545296, "rewards/real": 0.005099285393953323, "step": 260 }, { "epoch": 0.17, "learning_rate": 2.878464818763326e-07, "logits/generated": 6.452627658843994, "logits/real": 4.714515209197998, "logps/generated": -274.6763000488281, "logps/real": -239.6040802001953, "loss": 0.6897, "rewards/accuracies": 0.5625, "rewards/generated": -0.011514711193740368, "rewards/margins": 0.023869235068559647, "rewards/real": 0.012354524806141853, "step": 270 }, { "epoch": 0.18, "learning_rate": 2.985074626865671e-07, "logits/generated": 6.498050689697266, "logits/real": 4.76090669631958, "logps/generated": -270.68817138671875, "logps/real": -256.23040771484375, "loss": 0.6875, "rewards/accuracies": 0.6000000238418579, "rewards/generated": -0.023040171712636948, "rewards/margins": 0.00843932293355465, "rewards/real": -0.014600845053792, "step": 280 }, { "epoch": 0.19, "learning_rate": 3.0916844349680174e-07, "logits/generated": 6.579047203063965, "logits/real": 4.5393829345703125, "logps/generated": -292.82879638671875, "logps/real": -251.0160369873047, "loss": 0.6915, "rewards/accuracies": 0.5249999761581421, "rewards/generated": -0.0038245960604399443, "rewards/margins": 0.011936083436012268, "rewards/real": 0.008111484348773956, "step": 290 }, { "epoch": 0.19, "learning_rate": 3.1982942430703626e-07, "logits/generated": 6.381264686584473, "logits/real": 4.537992477416992, "logps/generated": -273.74224853515625, "logps/real": -239.7407684326172, "loss": 0.684, "rewards/accuracies": 0.512499988079071, "rewards/generated": -0.004199598915874958, "rewards/margins": 0.017961198464035988, "rewards/real": 0.01376159768551588, "step": 300 }, { "epoch": 0.2, "learning_rate": 3.304904051172708e-07, "logits/generated": 6.469088554382324, "logits/real": 4.945279598236084, "logps/generated": -275.0647277832031, "logps/real": -260.91900634765625, "loss": 0.6837, "rewards/accuracies": 0.625, "rewards/generated": -0.0022400771267712116, "rewards/margins": 0.012093605473637581, "rewards/real": 0.009853528812527657, "step": 310 }, { "epoch": 0.2, "learning_rate": 3.411513859275053e-07, "logits/generated": 6.3869123458862305, "logits/real": 4.953930854797363, "logps/generated": -277.23260498046875, "logps/real": -258.80120849609375, "loss": 0.685, "rewards/accuracies": 0.637499988079071, "rewards/generated": 0.0009156037122011185, "rewards/margins": 0.01676994189620018, "rewards/real": 0.01768554374575615, "step": 320 }, { "epoch": 0.21, "learning_rate": 3.5181236673773984e-07, "logits/generated": 6.399374961853027, "logits/real": 4.842419147491455, "logps/generated": -270.574951171875, "logps/real": -259.90386962890625, "loss": 0.6783, "rewards/accuracies": 0.6625000238418579, "rewards/generated": -0.014367667958140373, "rewards/margins": 0.042558759450912476, "rewards/real": 0.0281910952180624, "step": 330 }, { "epoch": 0.22, "learning_rate": 3.6247334754797437e-07, "logits/generated": 6.49670934677124, "logits/real": 4.750965118408203, "logps/generated": -281.72406005859375, "logps/real": -254.224853515625, "loss": 0.6784, "rewards/accuracies": 0.574999988079071, "rewards/generated": 0.0038049505092203617, "rewards/margins": 0.02935839258134365, "rewards/real": 0.03316333517432213, "step": 340 }, { "epoch": 0.22, "learning_rate": 3.7313432835820895e-07, "logits/generated": 6.555943965911865, "logits/real": 4.749678134918213, "logps/generated": -271.36529541015625, "logps/real": -246.26235961914062, "loss": 0.6711, "rewards/accuracies": 0.6499999761581421, "rewards/generated": -0.008218175731599331, "rewards/margins": 0.044897980988025665, "rewards/real": 0.03667980432510376, "step": 350 }, { "epoch": 0.23, "learning_rate": 3.8379530916844347e-07, "logits/generated": 6.510591983795166, "logits/real": 4.735245227813721, "logps/generated": -277.96197509765625, "logps/real": -237.43386840820312, "loss": 0.6671, "rewards/accuracies": 0.6499999761581421, "rewards/generated": -0.022423155605793, "rewards/margins": 0.0445123165845871, "rewards/real": 0.022089168429374695, "step": 360 }, { "epoch": 0.24, "learning_rate": 3.9445628997867805e-07, "logits/generated": 6.379269599914551, "logits/real": 5.035122871398926, "logps/generated": -270.5735778808594, "logps/real": -274.4202880859375, "loss": 0.6704, "rewards/accuracies": 0.637499988079071, "rewards/generated": -0.0076493457891047, "rewards/margins": 0.043878089636564255, "rewards/real": 0.03622874245047569, "step": 370 }, { "epoch": 0.24, "learning_rate": 4.051172707889126e-07, "logits/generated": 6.588892459869385, "logits/real": 4.547529697418213, "logps/generated": -293.5452880859375, "logps/real": -258.05047607421875, "loss": 0.67, "rewards/accuracies": 0.574999988079071, "rewards/generated": -0.014542962424457073, "rewards/margins": 0.04654566943645477, "rewards/real": 0.032002706080675125, "step": 380 }, { "epoch": 0.25, "learning_rate": 4.157782515991471e-07, "logits/generated": 6.528749942779541, "logits/real": 4.817520618438721, "logps/generated": -278.42205810546875, "logps/real": -245.11782836914062, "loss": 0.6648, "rewards/accuracies": 0.637499988079071, "rewards/generated": -0.01723286882042885, "rewards/margins": 0.05723923444747925, "rewards/real": 0.0400063656270504, "step": 390 }, { "epoch": 0.26, "learning_rate": 4.2643923240938163e-07, "logits/generated": 6.4185590744018555, "logits/real": 4.532934665679932, "logps/generated": -268.5301208496094, "logps/real": -250.79397583007812, "loss": 0.6603, "rewards/accuracies": 0.7124999761581421, "rewards/generated": -0.02642243169248104, "rewards/margins": 0.08120186626911163, "rewards/real": 0.05477944016456604, "step": 400 }, { "epoch": 0.26, "learning_rate": 4.371002132196162e-07, "logits/generated": 6.537232875823975, "logits/real": 4.77290678024292, "logps/generated": -285.8045959472656, "logps/real": -244.49655151367188, "loss": 0.6525, "rewards/accuracies": 0.7749999761581421, "rewards/generated": -0.012522486969828606, "rewards/margins": 0.08892401307821274, "rewards/real": 0.07640153169631958, "step": 410 }, { "epoch": 0.27, "learning_rate": 4.4776119402985074e-07, "logits/generated": 6.560798645019531, "logits/real": 4.509766578674316, "logps/generated": -264.9848327636719, "logps/real": -250.44418334960938, "loss": 0.6491, "rewards/accuracies": 0.6625000238418579, "rewards/generated": -0.00984070636332035, "rewards/margins": 0.07440716028213501, "rewards/real": 0.06456644833087921, "step": 420 }, { "epoch": 0.28, "learning_rate": 4.5842217484008526e-07, "logits/generated": 6.5955400466918945, "logits/real": 4.605684757232666, "logps/generated": -271.46160888671875, "logps/real": -233.9333953857422, "loss": 0.6469, "rewards/accuracies": 0.75, "rewards/generated": -0.016519565135240555, "rewards/margins": 0.10620995610952377, "rewards/real": 0.08969040215015411, "step": 430 }, { "epoch": 0.28, "learning_rate": 4.690831556503198e-07, "logits/generated": 6.498379707336426, "logits/real": 4.555215358734131, "logps/generated": -275.9375, "logps/real": -245.8618621826172, "loss": 0.6381, "rewards/accuracies": 0.7250000238418579, "rewards/generated": -0.019702184945344925, "rewards/margins": 0.10273710638284683, "rewards/real": 0.08303491771221161, "step": 440 }, { "epoch": 0.29, "learning_rate": 4.797441364605543e-07, "logits/generated": 6.488035678863525, "logits/real": 4.758559226989746, "logps/generated": -292.8480224609375, "logps/real": -235.81704711914062, "loss": 0.6266, "rewards/accuracies": 0.862500011920929, "rewards/generated": -0.0321931466460228, "rewards/margins": 0.13845106959342957, "rewards/real": 0.10625793039798737, "step": 450 }, { "epoch": 0.29, "learning_rate": 4.904051172707888e-07, "logits/generated": 6.370500564575195, "logits/real": 5.3891401290893555, "logps/generated": -288.18426513671875, "logps/real": -262.585693359375, "loss": 0.615, "rewards/accuracies": 0.8374999761581421, "rewards/generated": -0.047702450305223465, "rewards/margins": 0.16840405762195587, "rewards/real": 0.12070159614086151, "step": 460 }, { "epoch": 0.3, "learning_rate": 4.998815165876776e-07, "logits/generated": 6.4978346824646, "logits/real": 4.823352813720703, "logps/generated": -282.669921875, "logps/real": -243.49526977539062, "loss": 0.616, "rewards/accuracies": 0.862500011920929, "rewards/generated": -0.03011218085885048, "rewards/margins": 0.1787882149219513, "rewards/real": 0.14867602288722992, "step": 470 }, { "epoch": 0.31, "learning_rate": 4.98696682464455e-07, "logits/generated": 6.336424350738525, "logits/real": 4.733750343322754, "logps/generated": -267.9712829589844, "logps/real": -241.704345703125, "loss": 0.6071, "rewards/accuracies": 0.8374999761581421, "rewards/generated": -0.01858561486005783, "rewards/margins": 0.17029884457588196, "rewards/real": 0.15171322226524353, "step": 480 }, { "epoch": 0.31, "learning_rate": 4.975118483412322e-07, "logits/generated": 6.465763092041016, "logits/real": 4.750241279602051, "logps/generated": -276.46527099609375, "logps/real": -227.153564453125, "loss": 0.5994, "rewards/accuracies": 0.8125, "rewards/generated": -0.020980175584554672, "rewards/margins": 0.18550223112106323, "rewards/real": 0.16452205181121826, "step": 490 }, { "epoch": 0.32, "learning_rate": 4.963270142180094e-07, "logits/generated": 6.40543270111084, "logits/real": 4.948707103729248, "logps/generated": -280.562255859375, "logps/real": -245.1046142578125, "loss": 0.5866, "rewards/accuracies": 0.887499988079071, "rewards/generated": -0.08627736568450928, "rewards/margins": 0.2752222418785095, "rewards/real": 0.18894490599632263, "step": 500 }, { "epoch": 0.33, "learning_rate": 4.951421800947867e-07, "logits/generated": 6.454255104064941, "logits/real": 4.428702354431152, "logps/generated": -277.8967590332031, "logps/real": -239.1210479736328, "loss": 0.5713, "rewards/accuracies": 0.925000011920929, "rewards/generated": -0.04224959388375282, "rewards/margins": 0.2869378924369812, "rewards/real": 0.24468836188316345, "step": 510 }, { "epoch": 0.33, "learning_rate": 4.93957345971564e-07, "logits/generated": 6.436443328857422, "logits/real": 4.778555393218994, "logps/generated": -269.21307373046875, "logps/real": -231.3462677001953, "loss": 0.5635, "rewards/accuracies": 0.887499988079071, "rewards/generated": -0.027034465223550797, "rewards/margins": 0.30413612723350525, "rewards/real": 0.27710166573524475, "step": 520 }, { "epoch": 0.34, "learning_rate": 4.927725118483413e-07, "logits/generated": 6.379159927368164, "logits/real": 4.740503787994385, "logps/generated": -271.0333557128906, "logps/real": -231.5590057373047, "loss": 0.5598, "rewards/accuracies": 0.9375, "rewards/generated": -0.03343794867396355, "rewards/margins": 0.2944517731666565, "rewards/real": 0.26101383566856384, "step": 530 }, { "epoch": 0.35, "learning_rate": 4.915876777251184e-07, "logits/generated": 6.4404754638671875, "logits/real": 4.698214054107666, "logps/generated": -269.7973937988281, "logps/real": -245.9893798828125, "loss": 0.5512, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -0.03939288109540939, "rewards/margins": 0.3222863972187042, "rewards/real": 0.28289347887039185, "step": 540 }, { "epoch": 0.35, "learning_rate": 4.904028436018957e-07, "logits/generated": 6.421341896057129, "logits/real": 4.729471683502197, "logps/generated": -283.0313720703125, "logps/real": -245.7028350830078, "loss": 0.5334, "rewards/accuracies": 0.925000011920929, "rewards/generated": -0.06327588111162186, "rewards/margins": 0.36130860447883606, "rewards/real": 0.2980327010154724, "step": 550 }, { "epoch": 0.36, "learning_rate": 4.892180094786729e-07, "logits/generated": 6.3695831298828125, "logits/real": 4.896943092346191, "logps/generated": -275.6328125, "logps/real": -234.8939666748047, "loss": 0.5383, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -0.043337948620319366, "rewards/margins": 0.3582867383956909, "rewards/real": 0.31494876742362976, "step": 560 }, { "epoch": 0.36, "learning_rate": 4.880331753554502e-07, "logits/generated": 6.409314155578613, "logits/real": 4.403968811035156, "logps/generated": -288.6573181152344, "logps/real": -252.0238800048828, "loss": 0.5142, "rewards/accuracies": 0.9375, "rewards/generated": -0.09962789714336395, "rewards/margins": 0.4413130283355713, "rewards/real": 0.34168511629104614, "step": 570 }, { "epoch": 0.37, "learning_rate": 4.868483412322275e-07, "logits/generated": 6.514178276062012, "logits/real": 4.654687404632568, "logps/generated": -276.91796875, "logps/real": -247.6042938232422, "loss": 0.5149, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -0.07225228101015091, "rewards/margins": 0.36416369676589966, "rewards/real": 0.29191142320632935, "step": 580 }, { "epoch": 0.38, "learning_rate": 4.856635071090047e-07, "logits/generated": 6.484377861022949, "logits/real": 4.610289096832275, "logps/generated": -287.1705322265625, "logps/real": -248.52066040039062, "loss": 0.4863, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -0.12423114478588104, "rewards/margins": 0.5095083713531494, "rewards/real": 0.38527724146842957, "step": 590 }, { "epoch": 0.38, "learning_rate": 4.84478672985782e-07, "logits/generated": 6.429436683654785, "logits/real": 4.436240196228027, "logps/generated": -285.28814697265625, "logps/real": -233.54312133789062, "loss": 0.4702, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -0.11845574527978897, "rewards/margins": 0.5989123582839966, "rewards/real": 0.4804566502571106, "step": 600 }, { "epoch": 0.39, "learning_rate": 4.832938388625591e-07, "logits/generated": 6.502951145172119, "logits/real": 4.6671462059021, "logps/generated": -281.2021179199219, "logps/real": -235.6467742919922, "loss": 0.4764, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -0.07756136357784271, "rewards/margins": 0.5416086316108704, "rewards/real": 0.46404728293418884, "step": 610 }, { "epoch": 0.4, "learning_rate": 4.821090047393365e-07, "logits/generated": 6.4275922775268555, "logits/real": 4.628044128417969, "logps/generated": -287.75958251953125, "logps/real": -230.74954223632812, "loss": 0.4542, "rewards/accuracies": 0.9375, "rewards/generated": -0.11417678743600845, "rewards/margins": 0.655004620552063, "rewards/real": 0.5408278703689575, "step": 620 }, { "epoch": 0.4, "learning_rate": 4.809241706161137e-07, "logits/generated": 6.4412336349487305, "logits/real": 5.0383100509643555, "logps/generated": -262.37652587890625, "logps/real": -250.28353881835938, "loss": 0.4457, "rewards/accuracies": 0.925000011920929, "rewards/generated": -0.051359303295612335, "rewards/margins": 0.6307464838027954, "rewards/real": 0.5793871879577637, "step": 630 }, { "epoch": 0.41, "learning_rate": 4.79739336492891e-07, "logits/generated": 6.4190993309021, "logits/real": 4.736918926239014, "logps/generated": -292.39813232421875, "logps/real": -238.57089233398438, "loss": 0.4305, "rewards/accuracies": 0.949999988079071, "rewards/generated": -0.06552235782146454, "rewards/margins": 0.6890299916267395, "rewards/real": 0.623507559299469, "step": 640 }, { "epoch": 0.42, "learning_rate": 4.785545023696682e-07, "logits/generated": 6.509694576263428, "logits/real": 4.679415702819824, "logps/generated": -283.118896484375, "logps/real": -251.4104766845703, "loss": 0.414, "rewards/accuracies": 0.925000011920929, "rewards/generated": -0.12958040833473206, "rewards/margins": 0.7429105043411255, "rewards/real": 0.613330066204071, "step": 650 }, { "epoch": 0.42, "learning_rate": 4.773696682464455e-07, "logits/generated": 6.4275031089782715, "logits/real": 4.612570285797119, "logps/generated": -283.679931640625, "logps/real": -234.7230987548828, "loss": 0.4117, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -0.12087702751159668, "rewards/margins": 0.8004452586174011, "rewards/real": 0.679568350315094, "step": 660 }, { "epoch": 0.43, "learning_rate": 4.7618483412322273e-07, "logits/generated": 6.529521942138672, "logits/real": 4.53403377532959, "logps/generated": -276.1392822265625, "logps/real": -226.3787078857422, "loss": 0.4186, "rewards/accuracies": 0.925000011920929, "rewards/generated": -0.10057850182056427, "rewards/margins": 0.805554211139679, "rewards/real": 0.7049756646156311, "step": 670 }, { "epoch": 0.44, "learning_rate": 4.7499999999999995e-07, "logits/generated": 6.591378688812256, "logits/real": 4.763890743255615, "logps/generated": -295.3109436035156, "logps/real": -235.9544677734375, "loss": 0.3916, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -0.20255105197429657, "rewards/margins": 0.8574334979057312, "rewards/real": 0.654882550239563, "step": 680 }, { "epoch": 0.44, "learning_rate": 4.738151658767772e-07, "logits/generated": 6.50357723236084, "logits/real": 4.819157123565674, "logps/generated": -287.8582763671875, "logps/real": -247.63803100585938, "loss": 0.381, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -0.17564311623573303, "rewards/margins": 0.9119162559509277, "rewards/real": 0.7362731695175171, "step": 690 }, { "epoch": 0.45, "learning_rate": 4.726303317535545e-07, "logits/generated": 6.48019552230835, "logits/real": 4.711845397949219, "logps/generated": -269.28668212890625, "logps/real": -223.31192016601562, "loss": 0.385, "rewards/accuracies": 0.9375, "rewards/generated": -0.08161990344524384, "rewards/margins": 0.8838006258010864, "rewards/real": 0.8021806478500366, "step": 700 }, { "epoch": 0.45, "learning_rate": 4.7144549763033177e-07, "logits/generated": 6.481300354003906, "logits/real": 4.540422439575195, "logps/generated": -285.6842346191406, "logps/real": -235.37417602539062, "loss": 0.3653, "rewards/accuracies": 0.925000011920929, "rewards/generated": -0.19102832674980164, "rewards/margins": 0.9631514549255371, "rewards/real": 0.7721230983734131, "step": 710 }, { "epoch": 0.46, "learning_rate": 4.70260663507109e-07, "logits/generated": 6.4215497970581055, "logits/real": 4.966759204864502, "logps/generated": -279.190185546875, "logps/real": -242.8605194091797, "loss": 0.352, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -0.12265386432409286, "rewards/margins": 0.973602294921875, "rewards/real": 0.8509486317634583, "step": 720 }, { "epoch": 0.47, "learning_rate": 4.690758293838862e-07, "logits/generated": 6.504040718078613, "logits/real": 5.0368146896362305, "logps/generated": -269.21197509765625, "logps/real": -249.8148956298828, "loss": 0.3605, "rewards/accuracies": 0.925000011920929, "rewards/generated": -0.09707117080688477, "rewards/margins": 0.9983582496643066, "rewards/real": 0.9012872576713562, "step": 730 }, { "epoch": 0.47, "learning_rate": 4.678909952606635e-07, "logits/generated": 6.4590349197387695, "logits/real": 4.72456693649292, "logps/generated": -287.1020202636719, "logps/real": -243.59365844726562, "loss": 0.3471, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -0.19013145565986633, "rewards/margins": 1.1382641792297363, "rewards/real": 0.9481328129768372, "step": 740 }, { "epoch": 0.48, "learning_rate": 4.667061611374407e-07, "logits/generated": 6.586329460144043, "logits/real": 4.696109771728516, "logps/generated": -294.53399658203125, "logps/real": -256.1171569824219, "loss": 0.3363, "rewards/accuracies": 0.9375, "rewards/generated": -0.23052604496479034, "rewards/margins": 1.134413242340088, "rewards/real": 0.9038872718811035, "step": 750 }, { "epoch": 0.49, "learning_rate": 4.65521327014218e-07, "logits/generated": 6.411507606506348, "logits/real": 5.131626129150391, "logps/generated": -288.7245178222656, "logps/real": -269.36541748046875, "loss": 0.3392, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -0.23499789834022522, "rewards/margins": 1.1348694562911987, "rewards/real": 0.8998715281486511, "step": 760 }, { "epoch": 0.49, "learning_rate": 4.6433649289099525e-07, "logits/generated": 6.4309258460998535, "logits/real": 4.649967193603516, "logps/generated": -277.1844787597656, "logps/real": -242.71011352539062, "loss": 0.3126, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -0.2691335380077362, "rewards/margins": 1.2196036577224731, "rewards/real": 0.9504700899124146, "step": 770 }, { "epoch": 0.5, "learning_rate": 4.631516587677725e-07, "logits/generated": 6.520060062408447, "logits/real": 4.70443058013916, "logps/generated": -276.5824890136719, "logps/real": -213.98782348632812, "loss": 0.3163, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -0.2217775583267212, "rewards/margins": 1.2883174419403076, "rewards/real": 1.0665398836135864, "step": 780 }, { "epoch": 0.51, "learning_rate": 4.6196682464454974e-07, "logits/generated": 6.340624809265137, "logits/real": 4.910890102386475, "logps/generated": -276.492431640625, "logps/real": -250.92135620117188, "loss": 0.3078, "rewards/accuracies": 0.9375, "rewards/generated": -0.26993709802627563, "rewards/margins": 1.3064370155334473, "rewards/real": 1.0364999771118164, "step": 790 }, { "epoch": 0.51, "learning_rate": 4.60781990521327e-07, "logits/generated": 6.535134315490723, "logits/real": 4.642252445220947, "logps/generated": -282.04254150390625, "logps/real": -224.3877716064453, "loss": 0.2998, "rewards/accuracies": 0.9375, "rewards/generated": -0.2987816333770752, "rewards/margins": 1.3563276529312134, "rewards/real": 1.057545781135559, "step": 800 }, { "epoch": 0.52, "learning_rate": 4.5959715639810423e-07, "logits/generated": 6.41034460067749, "logits/real": 4.7313551902771, "logps/generated": -269.9966735839844, "logps/real": -222.67361450195312, "loss": 0.3116, "rewards/accuracies": 0.887499988079071, "rewards/generated": -0.1768857091665268, "rewards/margins": 1.3481709957122803, "rewards/real": 1.1712852716445923, "step": 810 }, { "epoch": 0.52, "learning_rate": 4.5841232227488145e-07, "logits/generated": 6.5445876121521, "logits/real": 4.706090450286865, "logps/generated": -292.2023010253906, "logps/real": -230.2037353515625, "loss": 0.3059, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -0.3371911942958832, "rewards/margins": 1.470198392868042, "rewards/real": 1.1330074071884155, "step": 820 }, { "epoch": 0.53, "learning_rate": 4.5722748815165873e-07, "logits/generated": 6.492764949798584, "logits/real": 4.727120876312256, "logps/generated": -286.11614990234375, "logps/real": -236.4132080078125, "loss": 0.2588, "rewards/accuracies": 0.949999988079071, "rewards/generated": -0.34763607382774353, "rewards/margins": 1.618819236755371, "rewards/real": 1.2711832523345947, "step": 830 }, { "epoch": 0.54, "learning_rate": 4.56042654028436e-07, "logits/generated": 6.487623691558838, "logits/real": 4.916855335235596, "logps/generated": -301.30670166015625, "logps/real": -261.5294494628906, "loss": 0.2905, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -0.26863616704940796, "rewards/margins": 1.2958793640136719, "rewards/real": 1.0272432565689087, "step": 840 }, { "epoch": 0.54, "learning_rate": 4.5485781990521327e-07, "logits/generated": 6.653237342834473, "logits/real": 4.623661518096924, "logps/generated": -285.1461181640625, "logps/real": -243.27243041992188, "loss": 0.3001, "rewards/accuracies": 0.9375, "rewards/generated": -0.20099958777427673, "rewards/margins": 1.3751389980316162, "rewards/real": 1.174139380455017, "step": 850 }, { "epoch": 0.55, "learning_rate": 4.536729857819905e-07, "logits/generated": 6.405764579772949, "logits/real": 4.904845714569092, "logps/generated": -265.61737060546875, "logps/real": -236.12594604492188, "loss": 0.2656, "rewards/accuracies": 0.9375, "rewards/generated": -0.2864135205745697, "rewards/margins": 1.4618184566497803, "rewards/real": 1.1754049062728882, "step": 860 }, { "epoch": 0.56, "learning_rate": 4.5248815165876776e-07, "logits/generated": 6.408907890319824, "logits/real": 4.733492374420166, "logps/generated": -282.3438415527344, "logps/real": -248.62728881835938, "loss": 0.2501, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -0.32550299167633057, "rewards/margins": 1.5754355192184448, "rewards/real": 1.2499325275421143, "step": 870 }, { "epoch": 0.56, "learning_rate": 4.5130331753554504e-07, "logits/generated": 6.523440361022949, "logits/real": 4.893500804901123, "logps/generated": -274.6362609863281, "logps/real": -239.6786346435547, "loss": 0.2517, "rewards/accuracies": 0.9375, "rewards/generated": -0.37395572662353516, "rewards/margins": 1.6604623794555664, "rewards/real": 1.2865066528320312, "step": 880 }, { "epoch": 0.57, "learning_rate": 4.5011848341232226e-07, "logits/generated": 6.497984886169434, "logits/real": 4.835686683654785, "logps/generated": -284.5157165527344, "logps/real": -240.1763458251953, "loss": 0.2639, "rewards/accuracies": 0.949999988079071, "rewards/generated": -0.3406885862350464, "rewards/margins": 1.6514053344726562, "rewards/real": 1.3107168674468994, "step": 890 }, { "epoch": 0.58, "learning_rate": 4.489336492890995e-07, "logits/generated": 6.601628303527832, "logits/real": 4.59323787689209, "logps/generated": -289.57421875, "logps/real": -227.0326690673828, "loss": 0.2481, "rewards/accuracies": 0.987500011920929, "rewards/generated": -0.4485829770565033, "rewards/margins": 1.7304086685180664, "rewards/real": 1.2818256616592407, "step": 900 }, { "epoch": 0.58, "learning_rate": 4.4774881516587675e-07, "logits/generated": 6.438521385192871, "logits/real": 5.212011814117432, "logps/generated": -274.275146484375, "logps/real": -256.7066345214844, "loss": 0.2254, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -0.3772863745689392, "rewards/margins": 1.8105132579803467, "rewards/real": 1.4332268238067627, "step": 910 }, { "epoch": 0.59, "learning_rate": 4.46563981042654e-07, "logits/generated": 6.509451389312744, "logits/real": 4.610627174377441, "logps/generated": -291.47552490234375, "logps/real": -251.83706665039062, "loss": 0.2307, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -0.4892211854457855, "rewards/margins": 1.7918579578399658, "rewards/real": 1.3026366233825684, "step": 920 }, { "epoch": 0.6, "learning_rate": 4.4537914691943124e-07, "logits/generated": 6.490546226501465, "logits/real": 4.814209938049316, "logps/generated": -273.9214782714844, "logps/real": -221.7511444091797, "loss": 0.2508, "rewards/accuracies": 0.925000011920929, "rewards/generated": -0.4122149348258972, "rewards/margins": 1.7060003280639648, "rewards/real": 1.2937853336334229, "step": 930 }, { "epoch": 0.6, "learning_rate": 4.441943127962085e-07, "logits/generated": 6.425353050231934, "logits/real": 5.0255045890808105, "logps/generated": -280.7168884277344, "logps/real": -247.033935546875, "loss": 0.2304, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -0.47266262769699097, "rewards/margins": 1.8882691860198975, "rewards/real": 1.4156067371368408, "step": 940 }, { "epoch": 0.61, "learning_rate": 4.430094786729858e-07, "logits/generated": 6.480774879455566, "logits/real": 4.786948204040527, "logps/generated": -291.06512451171875, "logps/real": -209.83615112304688, "loss": 0.2214, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -0.4364975094795227, "rewards/margins": 1.8951022624969482, "rewards/real": 1.4586045742034912, "step": 950 }, { "epoch": 0.61, "learning_rate": 4.4182464454976306e-07, "logits/generated": 6.375167369842529, "logits/real": 5.035046577453613, "logps/generated": -276.30718994140625, "logps/real": -235.35733032226562, "loss": 0.2082, "rewards/accuracies": 0.949999988079071, "rewards/generated": -0.40835681557655334, "rewards/margins": 2.0441970825195312, "rewards/real": 1.6358401775360107, "step": 960 }, { "epoch": 0.62, "learning_rate": 4.4063981042654023e-07, "logits/generated": 6.5355730056762695, "logits/real": 4.934959888458252, "logps/generated": -287.76385498046875, "logps/real": -228.20425415039062, "loss": 0.1887, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -0.589410126209259, "rewards/margins": 2.164210557937622, "rewards/real": 1.5748002529144287, "step": 970 }, { "epoch": 0.63, "learning_rate": 4.394549763033175e-07, "logits/generated": 6.452719211578369, "logits/real": 4.160326957702637, "logps/generated": -283.5166931152344, "logps/real": -217.1543426513672, "loss": 0.2238, "rewards/accuracies": 0.925000011920929, "rewards/generated": -0.24201051890850067, "rewards/margins": 2.0650689601898193, "rewards/real": 1.8230584859848022, "step": 980 }, { "epoch": 0.63, "learning_rate": 4.382701421800948e-07, "logits/generated": 6.507842063903809, "logits/real": 5.069902420043945, "logps/generated": -269.7446594238281, "logps/real": -248.04714965820312, "loss": 0.1762, "rewards/accuracies": 0.987500011920929, "rewards/generated": -0.44552716612815857, "rewards/margins": 2.1091365814208984, "rewards/real": 1.6636091470718384, "step": 990 }, { "epoch": 0.64, "learning_rate": 4.37085308056872e-07, "logits/generated": 6.380208492279053, "logits/real": 4.518065452575684, "logps/generated": -274.2218322753906, "logps/real": -219.1566925048828, "loss": 0.2048, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -0.46542254090309143, "rewards/margins": 2.0480804443359375, "rewards/real": 1.5826579332351685, "step": 1000 }, { "epoch": 0.65, "learning_rate": 4.3590047393364927e-07, "logits/generated": 6.60736608505249, "logits/real": 4.692660331726074, "logps/generated": -273.3490295410156, "logps/real": -219.55429077148438, "loss": 0.2376, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -0.2550446093082428, "rewards/margins": 1.8392921686172485, "rewards/real": 1.5842477083206177, "step": 1010 }, { "epoch": 0.65, "learning_rate": 4.3471563981042654e-07, "logits/generated": 6.539947509765625, "logits/real": 4.22275447845459, "logps/generated": -281.519775390625, "logps/real": -199.8513641357422, "loss": 0.1947, "rewards/accuracies": 1.0, "rewards/generated": -0.596612811088562, "rewards/margins": 2.411491870880127, "rewards/real": 1.8148788213729858, "step": 1020 }, { "epoch": 0.66, "learning_rate": 4.335308056872038e-07, "logits/generated": 6.498897552490234, "logits/real": 4.682340621948242, "logps/generated": -276.3707580566406, "logps/real": -240.2972412109375, "loss": 0.2016, "rewards/accuracies": 0.925000011920929, "rewards/generated": -0.45306864380836487, "rewards/margins": 1.9954931735992432, "rewards/real": 1.5424243211746216, "step": 1030 }, { "epoch": 0.67, "learning_rate": 4.32345971563981e-07, "logits/generated": 6.35316276550293, "logits/real": 4.76320219039917, "logps/generated": -291.69427490234375, "logps/real": -244.2530517578125, "loss": 0.1921, "rewards/accuracies": 0.9375, "rewards/generated": -0.6429895162582397, "rewards/margins": 2.3938496112823486, "rewards/real": 1.7508599758148193, "step": 1040 }, { "epoch": 0.67, "learning_rate": 4.3116113744075825e-07, "logits/generated": 6.538656711578369, "logits/real": 4.506134986877441, "logps/generated": -297.8096923828125, "logps/real": -223.4852294921875, "loss": 0.1945, "rewards/accuracies": 0.9375, "rewards/generated": -0.7660019397735596, "rewards/margins": 2.628037214279175, "rewards/real": 1.8620353937149048, "step": 1050 }, { "epoch": 0.68, "learning_rate": 4.299763033175355e-07, "logits/generated": 6.559047698974609, "logits/real": 4.753483772277832, "logps/generated": -283.09259033203125, "logps/real": -231.58828735351562, "loss": 0.1659, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -0.5766782164573669, "rewards/margins": 2.1829142570495605, "rewards/real": 1.6062358617782593, "step": 1060 }, { "epoch": 0.68, "learning_rate": 4.2879146919431274e-07, "logits/generated": 6.543219089508057, "logits/real": 4.634883880615234, "logps/generated": -292.83892822265625, "logps/real": -237.899658203125, "loss": 0.1848, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -0.7167092561721802, "rewards/margins": 2.470365524291992, "rewards/real": 1.7536563873291016, "step": 1070 }, { "epoch": 0.69, "learning_rate": 4.2760663507109e-07, "logits/generated": 6.605074405670166, "logits/real": 4.641299724578857, "logps/generated": -273.18243408203125, "logps/real": -222.3721923828125, "loss": 0.1918, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -0.5909595489501953, "rewards/margins": 2.6222665309906006, "rewards/real": 2.031306743621826, "step": 1080 }, { "epoch": 0.7, "learning_rate": 4.264218009478673e-07, "logits/generated": 6.473427772521973, "logits/real": 4.688056945800781, "logps/generated": -294.8439636230469, "logps/real": -218.5131378173828, "loss": 0.184, "rewards/accuracies": 0.949999988079071, "rewards/generated": -0.6649213433265686, "rewards/margins": 2.604104518890381, "rewards/real": 1.9391835927963257, "step": 1090 }, { "epoch": 0.7, "learning_rate": 4.2523696682464456e-07, "logits/generated": 6.37612247467041, "logits/real": 4.925015449523926, "logps/generated": -288.498779296875, "logps/real": -232.14883422851562, "loss": 0.1871, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -0.5896228551864624, "rewards/margins": 2.15082049369812, "rewards/real": 1.5611976385116577, "step": 1100 }, { "epoch": 0.71, "learning_rate": 4.240521327014218e-07, "logits/generated": 6.474294185638428, "logits/real": 4.607337474822998, "logps/generated": -271.22357177734375, "logps/real": -217.6533660888672, "loss": 0.1953, "rewards/accuracies": 0.987500011920929, "rewards/generated": -0.616405189037323, "rewards/margins": 2.434063196182251, "rewards/real": 1.8176580667495728, "step": 1110 }, { "epoch": 0.72, "learning_rate": 4.22867298578199e-07, "logits/generated": 6.460890769958496, "logits/real": 4.362028121948242, "logps/generated": -294.4446105957031, "logps/real": -233.0, "loss": 0.1522, "rewards/accuracies": 0.949999988079071, "rewards/generated": -0.9041115641593933, "rewards/margins": 2.483468770980835, "rewards/real": 1.5793571472167969, "step": 1120 }, { "epoch": 0.72, "learning_rate": 4.216824644549763e-07, "logits/generated": 6.5067267417907715, "logits/real": 4.801357269287109, "logps/generated": -283.41253662109375, "logps/real": -229.07308959960938, "loss": 0.1674, "rewards/accuracies": 0.987500011920929, "rewards/generated": -0.8184356689453125, "rewards/margins": 2.9656457901000977, "rewards/real": 2.147209644317627, "step": 1130 }, { "epoch": 0.73, "learning_rate": 4.2049763033175355e-07, "logits/generated": 6.525651454925537, "logits/real": 4.5972700119018555, "logps/generated": -273.68255615234375, "logps/real": -226.0257568359375, "loss": 0.1825, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -0.7906166315078735, "rewards/margins": 2.4105656147003174, "rewards/real": 1.6199489831924438, "step": 1140 }, { "epoch": 0.74, "learning_rate": 4.1931279620853077e-07, "logits/generated": 6.486138820648193, "logits/real": 4.842376232147217, "logps/generated": -297.69329833984375, "logps/real": -236.61843872070312, "loss": 0.1377, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -0.8111250996589661, "rewards/margins": 2.760737657546997, "rewards/real": 1.9496123790740967, "step": 1150 }, { "epoch": 0.74, "learning_rate": 4.1812796208530804e-07, "logits/generated": 6.288398265838623, "logits/real": 5.120048522949219, "logps/generated": -302.489990234375, "logps/real": -220.8452606201172, "loss": 0.1719, "rewards/accuracies": 0.949999988079071, "rewards/generated": -0.9194461107254028, "rewards/margins": 2.725492000579834, "rewards/real": 1.8060458898544312, "step": 1160 }, { "epoch": 0.75, "learning_rate": 4.169431279620853e-07, "logits/generated": 6.422547817230225, "logits/real": 5.059387683868408, "logps/generated": -285.9918518066406, "logps/real": -246.36697387695312, "loss": 0.1815, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -0.8091152310371399, "rewards/margins": 2.879631280899048, "rewards/real": 2.0705161094665527, "step": 1170 }, { "epoch": 0.75, "learning_rate": 4.1575829383886253e-07, "logits/generated": 6.4782843589782715, "logits/real": 4.969704627990723, "logps/generated": -287.41973876953125, "logps/real": -207.5157928466797, "loss": 0.191, "rewards/accuracies": 0.949999988079071, "rewards/generated": -0.8577953577041626, "rewards/margins": 2.5767669677734375, "rewards/real": 1.718971848487854, "step": 1180 }, { "epoch": 0.76, "learning_rate": 4.145734597156398e-07, "logits/generated": 6.450139045715332, "logits/real": 4.915671348571777, "logps/generated": -270.8932189941406, "logps/real": -217.2313995361328, "loss": 0.172, "rewards/accuracies": 0.925000011920929, "rewards/generated": -0.7773370146751404, "rewards/margins": 2.6479990482330322, "rewards/real": 1.870661973953247, "step": 1190 }, { "epoch": 0.77, "learning_rate": 4.1338862559241703e-07, "logits/generated": 6.705965518951416, "logits/real": 4.81022834777832, "logps/generated": -294.56597900390625, "logps/real": -211.1824493408203, "loss": 0.16, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -1.0764670372009277, "rewards/margins": 3.1245856285095215, "rewards/real": 2.0481185913085938, "step": 1200 }, { "epoch": 0.77, "learning_rate": 4.122037914691943e-07, "logits/generated": 6.361940860748291, "logits/real": 4.980579376220703, "logps/generated": -287.1803283691406, "logps/real": -244.1328125, "loss": 0.1562, "rewards/accuracies": 0.949999988079071, "rewards/generated": -0.976216197013855, "rewards/margins": 2.9318318367004395, "rewards/real": 1.9556156396865845, "step": 1210 }, { "epoch": 0.78, "learning_rate": 4.110189573459715e-07, "logits/generated": 6.496147155761719, "logits/real": 4.533209800720215, "logps/generated": -292.2799377441406, "logps/real": -220.11685180664062, "loss": 0.145, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -0.9167261123657227, "rewards/margins": 3.0090811252593994, "rewards/real": 2.0923550128936768, "step": 1220 }, { "epoch": 0.79, "learning_rate": 4.098341232227488e-07, "logits/generated": 6.39020299911499, "logits/real": 4.5528459548950195, "logps/generated": -286.52435302734375, "logps/real": -214.3458251953125, "loss": 0.1624, "rewards/accuracies": 0.987500011920929, "rewards/generated": -0.9529097676277161, "rewards/margins": 2.9751744270324707, "rewards/real": 2.0222644805908203, "step": 1230 }, { "epoch": 0.79, "learning_rate": 4.0864928909952607e-07, "logits/generated": 6.4211554527282715, "logits/real": 4.6509599685668945, "logps/generated": -282.4293518066406, "logps/real": -223.8668975830078, "loss": 0.167, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -0.7745916247367859, "rewards/margins": 2.5993974208831787, "rewards/real": 1.8248056173324585, "step": 1240 }, { "epoch": 0.8, "learning_rate": 4.074644549763033e-07, "logits/generated": 6.483539581298828, "logits/real": 4.578310489654541, "logps/generated": -286.9674987792969, "logps/real": -238.4805145263672, "loss": 0.1563, "rewards/accuracies": 0.9375, "rewards/generated": -0.8147264719009399, "rewards/margins": 2.793470859527588, "rewards/real": 1.9787445068359375, "step": 1250 }, { "epoch": 0.81, "learning_rate": 4.0627962085308056e-07, "logits/generated": 6.479307651519775, "logits/real": 4.371944427490234, "logps/generated": -293.8260803222656, "logps/real": -221.93624877929688, "loss": 0.1552, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -1.0739367008209229, "rewards/margins": 2.962772846221924, "rewards/real": 1.888836145401001, "step": 1260 }, { "epoch": 0.81, "learning_rate": 4.0509478672985783e-07, "logits/generated": 6.399652004241943, "logits/real": 5.044413089752197, "logps/generated": -293.4299621582031, "logps/real": -239.0977325439453, "loss": 0.1582, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.0476984977722168, "rewards/margins": 3.119697332382202, "rewards/real": 2.0719990730285645, "step": 1270 }, { "epoch": 0.82, "learning_rate": 4.0390995260663505e-07, "logits/generated": 6.399343967437744, "logits/real": 5.123462677001953, "logps/generated": -286.2010498046875, "logps/real": -238.5330352783203, "loss": 0.1431, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -0.8854296803474426, "rewards/margins": 2.840718984603882, "rewards/real": 1.9552892446517944, "step": 1280 }, { "epoch": 0.83, "learning_rate": 4.0272511848341227e-07, "logits/generated": 6.5136399269104, "logits/real": 5.123744487762451, "logps/generated": -277.1810607910156, "logps/real": -224.10684204101562, "loss": 0.1746, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -0.9058412313461304, "rewards/margins": 2.760077953338623, "rewards/real": 1.8542366027832031, "step": 1290 }, { "epoch": 0.83, "learning_rate": 4.0154028436018954e-07, "logits/generated": 6.532641410827637, "logits/real": 4.748272895812988, "logps/generated": -294.2531433105469, "logps/real": -227.92483520507812, "loss": 0.1391, "rewards/accuracies": 1.0, "rewards/generated": -1.3061493635177612, "rewards/margins": 3.458925724029541, "rewards/real": 2.152775764465332, "step": 1300 }, { "epoch": 0.84, "learning_rate": 4.003554502369668e-07, "logits/generated": 6.615334987640381, "logits/real": 4.404749393463135, "logps/generated": -301.03900146484375, "logps/real": -221.3147430419922, "loss": 0.1335, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -1.0821572542190552, "rewards/margins": 3.1977028846740723, "rewards/real": 2.1155455112457275, "step": 1310 }, { "epoch": 0.84, "learning_rate": 3.991706161137441e-07, "logits/generated": 6.418117523193359, "logits/real": 4.882086753845215, "logps/generated": -294.2748718261719, "logps/real": -232.966552734375, "loss": 0.133, "rewards/accuracies": 0.987500011920929, "rewards/generated": -1.2364888191223145, "rewards/margins": 3.057285785675049, "rewards/real": 1.8207969665527344, "step": 1320 }, { "epoch": 0.85, "learning_rate": 3.979857819905213e-07, "logits/generated": 6.474581241607666, "logits/real": 4.630919933319092, "logps/generated": -291.40313720703125, "logps/real": -231.16006469726562, "loss": 0.0996, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -1.1898620128631592, "rewards/margins": 3.224040985107422, "rewards/real": 2.0341789722442627, "step": 1330 }, { "epoch": 0.86, "learning_rate": 3.968009478672986e-07, "logits/generated": 6.528960227966309, "logits/real": 4.991686820983887, "logps/generated": -280.37518310546875, "logps/real": -234.42623901367188, "loss": 0.1544, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.0884606838226318, "rewards/margins": 3.3553130626678467, "rewards/real": 2.2668521404266357, "step": 1340 }, { "epoch": 0.86, "learning_rate": 3.9561611374407585e-07, "logits/generated": 6.5070061683654785, "logits/real": 4.700498104095459, "logps/generated": -285.8040466308594, "logps/real": -211.8716278076172, "loss": 0.15, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -1.127325177192688, "rewards/margins": 3.0416786670684814, "rewards/real": 1.914353370666504, "step": 1350 }, { "epoch": 0.87, "learning_rate": 3.94431279620853e-07, "logits/generated": 6.504839897155762, "logits/real": 4.721653938293457, "logps/generated": -281.8013610839844, "logps/real": -217.22647094726562, "loss": 0.1237, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.1646500825881958, "rewards/margins": 3.471494674682617, "rewards/real": 2.306844711303711, "step": 1360 }, { "epoch": 0.88, "learning_rate": 3.932464454976303e-07, "logits/generated": 6.476781368255615, "logits/real": 4.780390739440918, "logps/generated": -298.7755432128906, "logps/real": -242.40310668945312, "loss": 0.147, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.1053074598312378, "rewards/margins": 3.288050889968872, "rewards/real": 2.1827430725097656, "step": 1370 }, { "epoch": 0.88, "learning_rate": 3.9206161137440757e-07, "logits/generated": 6.5268354415893555, "logits/real": 4.777252197265625, "logps/generated": -302.7318115234375, "logps/real": -233.9207305908203, "loss": 0.1532, "rewards/accuracies": 0.987500011920929, "rewards/generated": -1.3840343952178955, "rewards/margins": 3.5235512256622314, "rewards/real": 2.139516592025757, "step": 1380 }, { "epoch": 0.89, "learning_rate": 3.9087677725118484e-07, "logits/generated": 6.526573181152344, "logits/real": 4.747769355773926, "logps/generated": -291.31085205078125, "logps/real": -239.3638916015625, "loss": 0.1283, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.26791250705719, "rewards/margins": 3.175063133239746, "rewards/real": 1.9071502685546875, "step": 1390 }, { "epoch": 0.9, "learning_rate": 3.8969194312796206e-07, "logits/generated": 6.308130264282227, "logits/real": 4.654840469360352, "logps/generated": -301.1564636230469, "logps/real": -234.5809326171875, "loss": 0.1158, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.2920730113983154, "rewards/margins": 3.129149913787842, "rewards/real": 1.8370771408081055, "step": 1400 }, { "epoch": 0.9, "learning_rate": 3.8850710900473933e-07, "logits/generated": 6.504507541656494, "logits/real": 4.65291166305542, "logps/generated": -305.6498107910156, "logps/real": -233.3607635498047, "loss": 0.1156, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.4324750900268555, "rewards/margins": 3.3325772285461426, "rewards/real": 1.9001020193099976, "step": 1410 }, { "epoch": 0.91, "learning_rate": 3.873222748815166e-07, "logits/generated": 6.564852237701416, "logits/real": 4.4015302658081055, "logps/generated": -288.9748840332031, "logps/real": -222.2322235107422, "loss": 0.1362, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.43608820438385, "rewards/margins": 3.308760404586792, "rewards/real": 1.8726723194122314, "step": 1420 }, { "epoch": 0.91, "learning_rate": 3.8613744075829377e-07, "logits/generated": 6.503379821777344, "logits/real": 4.592303276062012, "logps/generated": -289.198486328125, "logps/real": -226.54165649414062, "loss": 0.1142, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.3943302631378174, "rewards/margins": 3.6817550659179688, "rewards/real": 2.2874248027801514, "step": 1430 }, { "epoch": 0.92, "learning_rate": 3.8495260663507104e-07, "logits/generated": 6.395134925842285, "logits/real": 4.751669883728027, "logps/generated": -280.8811340332031, "logps/real": -227.56826782226562, "loss": 0.1107, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -1.2301466464996338, "rewards/margins": 3.3704135417938232, "rewards/real": 2.1402671337127686, "step": 1440 }, { "epoch": 0.93, "learning_rate": 3.837677725118483e-07, "logits/generated": 6.485627174377441, "logits/real": 4.733918190002441, "logps/generated": -306.0613708496094, "logps/real": -236.187744140625, "loss": 0.1028, "rewards/accuracies": 0.987500011920929, "rewards/generated": -1.2733852863311768, "rewards/margins": 3.3273322582244873, "rewards/real": 2.0539469718933105, "step": 1450 }, { "epoch": 0.93, "learning_rate": 3.825829383886256e-07, "logits/generated": 6.625826358795166, "logits/real": 4.502466201782227, "logps/generated": -302.5285339355469, "logps/real": -191.37582397460938, "loss": 0.126, "rewards/accuracies": 0.949999988079071, "rewards/generated": -1.4649912118911743, "rewards/margins": 3.6144001483917236, "rewards/real": 2.149409532546997, "step": 1460 }, { "epoch": 0.94, "learning_rate": 3.813981042654028e-07, "logits/generated": 6.492133140563965, "logits/real": 4.759311199188232, "logps/generated": -302.1866149902344, "logps/real": -230.4091796875, "loss": 0.1152, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.367823600769043, "rewards/margins": 3.351057529449463, "rewards/real": 1.9832338094711304, "step": 1470 }, { "epoch": 0.95, "learning_rate": 3.802132701421801e-07, "logits/generated": 6.473480224609375, "logits/real": 4.9179205894470215, "logps/generated": -292.11358642578125, "logps/real": -235.0968017578125, "loss": 0.1294, "rewards/accuracies": 0.987500011920929, "rewards/generated": -1.268405795097351, "rewards/margins": 3.70072603225708, "rewards/real": 2.4323201179504395, "step": 1480 }, { "epoch": 0.95, "learning_rate": 3.7902843601895736e-07, "logits/generated": 6.590761661529541, "logits/real": 4.755660057067871, "logps/generated": -283.9560852050781, "logps/real": -239.81893920898438, "loss": 0.1209, "rewards/accuracies": 0.9375, "rewards/generated": -1.3544660806655884, "rewards/margins": 3.5569427013397217, "rewards/real": 2.202476978302002, "step": 1490 }, { "epoch": 0.96, "learning_rate": 3.778436018957346e-07, "logits/generated": 6.543065071105957, "logits/real": 4.660527229309082, "logps/generated": -293.5489501953125, "logps/real": -231.8582305908203, "loss": 0.1171, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.3222100734710693, "rewards/margins": 3.1835999488830566, "rewards/real": 1.8613903522491455, "step": 1500 }, { "epoch": 0.97, "learning_rate": 3.766587677725118e-07, "logits/generated": 6.483323097229004, "logits/real": 4.6694488525390625, "logps/generated": -292.2344665527344, "logps/real": -229.16067504882812, "loss": 0.1245, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.535994052886963, "rewards/margins": 3.5554702281951904, "rewards/real": 2.0194761753082275, "step": 1510 }, { "epoch": 0.97, "learning_rate": 3.7547393364928907e-07, "logits/generated": 6.537093162536621, "logits/real": 4.834275722503662, "logps/generated": -283.90765380859375, "logps/real": -221.65771484375, "loss": 0.1043, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.336133599281311, "rewards/margins": 3.4120376110076904, "rewards/real": 2.075904369354248, "step": 1520 }, { "epoch": 0.98, "learning_rate": 3.7428909952606634e-07, "logits/generated": 6.584852695465088, "logits/real": 4.697403907775879, "logps/generated": -293.49371337890625, "logps/real": -230.45291137695312, "loss": 0.1223, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -1.4560847282409668, "rewards/margins": 3.7806944847106934, "rewards/real": 2.3246102333068848, "step": 1530 }, { "epoch": 0.99, "learning_rate": 3.7310426540284356e-07, "logits/generated": 6.4898881912231445, "logits/real": 4.787100791931152, "logps/generated": -289.96502685546875, "logps/real": -235.83511352539062, "loss": 0.1452, "rewards/accuracies": 0.925000011920929, "rewards/generated": -1.4755102396011353, "rewards/margins": 3.804914951324463, "rewards/real": 2.329404592514038, "step": 1540 }, { "epoch": 0.99, "learning_rate": 3.7191943127962083e-07, "logits/generated": 6.573674201965332, "logits/real": 4.813099384307861, "logps/generated": -295.05950927734375, "logps/real": -239.4409942626953, "loss": 0.1134, "rewards/accuracies": 0.987500011920929, "rewards/generated": -1.601508378982544, "rewards/margins": 3.6218199729919434, "rewards/real": 2.0203115940093994, "step": 1550 }, { "epoch": 1.0, "learning_rate": 3.707345971563981e-07, "logits/generated": 6.518675804138184, "logits/real": 4.694587707519531, "logps/generated": -288.26116943359375, "logps/real": -216.22702026367188, "loss": 0.1253, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -1.3888970613479614, "rewards/margins": 3.527122974395752, "rewards/real": 2.1382253170013428, "step": 1560 }, { "epoch": 1.0, "learning_rate": 3.695497630331754e-07, "logits/generated": 6.476728916168213, "logits/real": 4.867232322692871, "logps/generated": -290.8531494140625, "logps/real": -223.3974151611328, "loss": 0.1379, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.48335599899292, "rewards/margins": 3.3799889087677, "rewards/real": 1.8966329097747803, "step": 1570 }, { "epoch": 1.01, "learning_rate": 3.683649289099526e-07, "logits/generated": 6.590670585632324, "logits/real": 4.584352493286133, "logps/generated": -280.3003845214844, "logps/real": -222.9787139892578, "loss": 0.1055, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.518668532371521, "rewards/margins": 3.503495454788208, "rewards/real": 1.9848268032073975, "step": 1580 }, { "epoch": 1.02, "learning_rate": 3.671800947867298e-07, "logits/generated": 6.4740777015686035, "logits/real": 4.666982173919678, "logps/generated": -293.92950439453125, "logps/real": -241.7804718017578, "loss": 0.0979, "rewards/accuracies": 0.987500011920929, "rewards/generated": -1.4945917129516602, "rewards/margins": 3.7217907905578613, "rewards/real": 2.227198839187622, "step": 1590 }, { "epoch": 1.02, "learning_rate": 3.659952606635071e-07, "logits/generated": 6.5625457763671875, "logits/real": 4.97821044921875, "logps/generated": -287.527099609375, "logps/real": -244.9476776123047, "loss": 0.1185, "rewards/accuracies": 0.925000011920929, "rewards/generated": -1.5237135887145996, "rewards/margins": 3.7011260986328125, "rewards/real": 2.177412509918213, "step": 1600 }, { "epoch": 1.03, "learning_rate": 3.648104265402843e-07, "logits/generated": 6.531602382659912, "logits/real": 5.296113014221191, "logps/generated": -283.7915954589844, "logps/real": -241.6884765625, "loss": 0.1083, "rewards/accuracies": 0.9375, "rewards/generated": -1.42214834690094, "rewards/margins": 3.4255123138427734, "rewards/real": 2.003364086151123, "step": 1610 }, { "epoch": 1.04, "learning_rate": 3.636255924170616e-07, "logits/generated": 6.558846473693848, "logits/real": 4.663855075836182, "logps/generated": -295.4413757324219, "logps/real": -231.3755340576172, "loss": 0.1392, "rewards/accuracies": 0.925000011920929, "rewards/generated": -1.4816911220550537, "rewards/margins": 3.7079098224639893, "rewards/real": 2.2262187004089355, "step": 1620 }, { "epoch": 1.04, "learning_rate": 3.6244075829383886e-07, "logits/generated": 6.471614837646484, "logits/real": 4.710072994232178, "logps/generated": -288.50982666015625, "logps/real": -242.4681854248047, "loss": 0.133, "rewards/accuracies": 0.949999988079071, "rewards/generated": -1.2834060192108154, "rewards/margins": 3.245429277420044, "rewards/real": 1.962023377418518, "step": 1630 }, { "epoch": 1.05, "learning_rate": 3.6125592417061613e-07, "logits/generated": 6.597050666809082, "logits/real": 4.699416160583496, "logps/generated": -309.9823303222656, "logps/real": -241.0970001220703, "loss": 0.1212, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -1.800374984741211, "rewards/margins": 3.8445522785186768, "rewards/real": 2.044177532196045, "step": 1640 }, { "epoch": 1.06, "learning_rate": 3.6007109004739335e-07, "logits/generated": 6.396651744842529, "logits/real": 4.873335838317871, "logps/generated": -302.5182800292969, "logps/real": -225.38162231445312, "loss": 0.096, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.7537730932235718, "rewards/margins": 3.5806782245635986, "rewards/real": 1.8269050121307373, "step": 1650 }, { "epoch": 1.06, "learning_rate": 3.588862559241706e-07, "logits/generated": 6.512624263763428, "logits/real": 4.873417854309082, "logps/generated": -310.1457824707031, "logps/real": -221.52127075195312, "loss": 0.1038, "rewards/accuracies": 0.949999988079071, "rewards/generated": -1.995270013809204, "rewards/margins": 3.832683563232422, "rewards/real": 1.8374135494232178, "step": 1660 }, { "epoch": 1.07, "learning_rate": 3.5770142180094784e-07, "logits/generated": 6.3861165046691895, "logits/real": 4.902550220489502, "logps/generated": -309.2914123535156, "logps/real": -254.0144805908203, "loss": 0.0882, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.0285847187042236, "rewards/margins": 4.352484703063965, "rewards/real": 2.323899745941162, "step": 1670 }, { "epoch": 1.07, "learning_rate": 3.5651658767772506e-07, "logits/generated": 6.3969197273254395, "logits/real": 4.497953414916992, "logps/generated": -282.2160949707031, "logps/real": -221.9111328125, "loss": 0.1083, "rewards/accuracies": 0.949999988079071, "rewards/generated": -1.500212550163269, "rewards/margins": 3.4251670837402344, "rewards/real": 1.9249544143676758, "step": 1680 }, { "epoch": 1.08, "learning_rate": 3.5533175355450234e-07, "logits/generated": 6.489283084869385, "logits/real": 5.103555202484131, "logps/generated": -296.80084228515625, "logps/real": -254.9130859375, "loss": 0.1188, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -1.7659108638763428, "rewards/margins": 3.7144954204559326, "rewards/real": 1.948584794998169, "step": 1690 }, { "epoch": 1.09, "learning_rate": 3.541469194312796e-07, "logits/generated": 6.5424089431762695, "logits/real": 4.5776543617248535, "logps/generated": -306.41778564453125, "logps/real": -229.9923553466797, "loss": 0.0874, "rewards/accuracies": 0.987500011920929, "rewards/generated": -1.5843690633773804, "rewards/margins": 3.7243666648864746, "rewards/real": 2.139997959136963, "step": 1700 }, { "epoch": 1.09, "learning_rate": 3.529620853080569e-07, "logits/generated": 6.53662109375, "logits/real": 4.515005588531494, "logps/generated": -281.1587829589844, "logps/real": -198.87599182128906, "loss": 0.1229, "rewards/accuracies": 0.9375, "rewards/generated": -1.793796181678772, "rewards/margins": 3.6370015144348145, "rewards/real": 1.8432050943374634, "step": 1710 }, { "epoch": 1.1, "learning_rate": 3.517772511848341e-07, "logits/generated": 6.402149200439453, "logits/real": 5.097185134887695, "logps/generated": -300.73248291015625, "logps/real": -269.5830078125, "loss": 0.0861, "rewards/accuracies": 0.987500011920929, "rewards/generated": -1.8012573719024658, "rewards/margins": 4.034179210662842, "rewards/real": 2.232922077178955, "step": 1720 }, { "epoch": 1.11, "learning_rate": 3.505924170616114e-07, "logits/generated": 6.531800746917725, "logits/real": 4.443795204162598, "logps/generated": -297.3340759277344, "logps/real": -225.61459350585938, "loss": 0.0949, "rewards/accuracies": 0.987500011920929, "rewards/generated": -1.9287163019180298, "rewards/margins": 4.080899715423584, "rewards/real": 2.1521835327148438, "step": 1730 }, { "epoch": 1.11, "learning_rate": 3.4940758293838865e-07, "logits/generated": 6.565011501312256, "logits/real": 4.78118371963501, "logps/generated": -300.0439453125, "logps/real": -232.56979370117188, "loss": 0.0998, "rewards/accuracies": 1.0, "rewards/generated": -1.7687839269638062, "rewards/margins": 3.690317153930664, "rewards/real": 1.921533226966858, "step": 1740 }, { "epoch": 1.12, "learning_rate": 3.482227488151658e-07, "logits/generated": 6.521812438964844, "logits/real": 4.503942489624023, "logps/generated": -304.3191223144531, "logps/real": -216.3659210205078, "loss": 0.105, "rewards/accuracies": 0.949999988079071, "rewards/generated": -1.9444500207901, "rewards/margins": 4.122984886169434, "rewards/real": 2.178535223007202, "step": 1750 }, { "epoch": 1.13, "learning_rate": 3.470379146919431e-07, "logits/generated": 6.601067543029785, "logits/real": 4.580229759216309, "logps/generated": -304.520751953125, "logps/real": -218.858154296875, "loss": 0.1114, "rewards/accuracies": 0.9375, "rewards/generated": -1.790876030921936, "rewards/margins": 3.8865456581115723, "rewards/real": 2.0956692695617676, "step": 1760 }, { "epoch": 1.13, "learning_rate": 3.4585308056872036e-07, "logits/generated": 6.4673662185668945, "logits/real": 4.512519836425781, "logps/generated": -309.33563232421875, "logps/real": -237.4860076904297, "loss": 0.0852, "rewards/accuracies": 1.0, "rewards/generated": -2.074862003326416, "rewards/margins": 4.4387288093566895, "rewards/real": 2.3638668060302734, "step": 1770 }, { "epoch": 1.14, "learning_rate": 3.4466824644549763e-07, "logits/generated": 6.52877140045166, "logits/real": 4.660586833953857, "logps/generated": -292.84735107421875, "logps/real": -235.4850616455078, "loss": 0.0748, "rewards/accuracies": 1.0, "rewards/generated": -1.910387635231018, "rewards/margins": 4.243655204772949, "rewards/real": 2.3332676887512207, "step": 1780 }, { "epoch": 1.15, "learning_rate": 3.4348341232227485e-07, "logits/generated": 6.508902549743652, "logits/real": 5.103868007659912, "logps/generated": -309.3510437011719, "logps/real": -261.96673583984375, "loss": 0.1137, "rewards/accuracies": 0.987500011920929, "rewards/generated": -1.9301677942276, "rewards/margins": 3.7662911415100098, "rewards/real": 1.8361234664916992, "step": 1790 }, { "epoch": 1.15, "learning_rate": 3.422985781990521e-07, "logits/generated": 6.578322410583496, "logits/real": 4.619819641113281, "logps/generated": -309.3769836425781, "logps/real": -230.23489379882812, "loss": 0.0928, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.1213955879211426, "rewards/margins": 4.048805236816406, "rewards/real": 1.9274095296859741, "step": 1800 }, { "epoch": 1.16, "learning_rate": 3.411137440758294e-07, "logits/generated": 6.638535499572754, "logits/real": 4.866148948669434, "logps/generated": -297.40826416015625, "logps/real": -226.35610961914062, "loss": 0.0875, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.079007625579834, "rewards/margins": 4.372213840484619, "rewards/real": 2.293206214904785, "step": 1810 }, { "epoch": 1.16, "learning_rate": 3.3992890995260667e-07, "logits/generated": 6.590002536773682, "logits/real": 4.547215461730957, "logps/generated": -288.596435546875, "logps/real": -198.56236267089844, "loss": 0.0912, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.8445419073104858, "rewards/margins": 3.7398104667663574, "rewards/real": 1.8952690362930298, "step": 1820 }, { "epoch": 1.17, "learning_rate": 3.3874407582938384e-07, "logits/generated": 6.5464019775390625, "logits/real": 4.549433708190918, "logps/generated": -316.94287109375, "logps/real": -230.5665283203125, "loss": 0.0736, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.1460461616516113, "rewards/margins": 4.4059224128723145, "rewards/real": 2.259875774383545, "step": 1830 }, { "epoch": 1.18, "learning_rate": 3.375592417061611e-07, "logits/generated": 6.43328332901001, "logits/real": 4.823711395263672, "logps/generated": -301.39202880859375, "logps/real": -236.8686065673828, "loss": 0.0918, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -1.8650691509246826, "rewards/margins": 4.1491899490356445, "rewards/real": 2.284120559692383, "step": 1840 }, { "epoch": 1.18, "learning_rate": 3.363744075829384e-07, "logits/generated": 6.538866996765137, "logits/real": 4.6786723136901855, "logps/generated": -291.01483154296875, "logps/real": -196.77285766601562, "loss": 0.0739, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -1.9935919046401978, "rewards/margins": 4.511776447296143, "rewards/real": 2.518184185028076, "step": 1850 }, { "epoch": 1.19, "learning_rate": 3.351895734597156e-07, "logits/generated": 6.427986145019531, "logits/real": 4.777734279632568, "logps/generated": -291.7196350097656, "logps/real": -219.09597778320312, "loss": 0.1, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.9151489734649658, "rewards/margins": 3.943358898162842, "rewards/real": 2.028210163116455, "step": 1860 }, { "epoch": 1.2, "learning_rate": 3.340047393364929e-07, "logits/generated": 6.557374477386475, "logits/real": 4.517666339874268, "logps/generated": -293.89178466796875, "logps/real": -231.68222045898438, "loss": 0.0845, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.155890703201294, "rewards/margins": 4.3398518562316895, "rewards/real": 2.1839611530303955, "step": 1870 }, { "epoch": 1.2, "learning_rate": 3.3281990521327015e-07, "logits/generated": 6.494599342346191, "logits/real": 4.4522833824157715, "logps/generated": -309.76556396484375, "logps/real": -219.76806640625, "loss": 0.0856, "rewards/accuracies": 1.0, "rewards/generated": -2.3567099571228027, "rewards/margins": 4.560822486877441, "rewards/real": 2.2041122913360596, "step": 1880 }, { "epoch": 1.21, "learning_rate": 3.316350710900474e-07, "logits/generated": 6.508890628814697, "logits/real": 4.725305080413818, "logps/generated": -294.90533447265625, "logps/real": -225.0491180419922, "loss": 0.1116, "rewards/accuracies": 0.949999988079071, "rewards/generated": -1.9600846767425537, "rewards/margins": 3.896559476852417, "rewards/real": 1.9364748001098633, "step": 1890 }, { "epoch": 1.22, "learning_rate": 3.304502369668246e-07, "logits/generated": 6.483962059020996, "logits/real": 4.7496185302734375, "logps/generated": -294.6922607421875, "logps/real": -234.5235137939453, "loss": 0.0806, "rewards/accuracies": 1.0, "rewards/generated": -2.0787155628204346, "rewards/margins": 4.212123394012451, "rewards/real": 2.1334080696105957, "step": 1900 }, { "epoch": 1.22, "learning_rate": 3.2926540284360186e-07, "logits/generated": 6.553183078765869, "logits/real": 4.5973711013793945, "logps/generated": -290.69122314453125, "logps/real": -224.2595977783203, "loss": 0.0786, "rewards/accuracies": 1.0, "rewards/generated": -2.1069176197052, "rewards/margins": 4.262394905090332, "rewards/real": 2.155477523803711, "step": 1910 }, { "epoch": 1.23, "learning_rate": 3.2808056872037913e-07, "logits/generated": 6.475919246673584, "logits/real": 4.781458854675293, "logps/generated": -293.8143615722656, "logps/real": -242.02157592773438, "loss": 0.0815, "rewards/accuracies": 1.0, "rewards/generated": -2.0358715057373047, "rewards/margins": 4.052926063537598, "rewards/real": 2.017054796218872, "step": 1920 }, { "epoch": 1.23, "learning_rate": 3.2689573459715635e-07, "logits/generated": 6.587837219238281, "logits/real": 4.240110874176025, "logps/generated": -300.80084228515625, "logps/real": -197.26913452148438, "loss": 0.0862, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.1164047718048096, "rewards/margins": 4.440495014190674, "rewards/real": 2.3240902423858643, "step": 1930 }, { "epoch": 1.24, "learning_rate": 3.2571090047393363e-07, "logits/generated": 6.373067855834961, "logits/real": 4.968833923339844, "logps/generated": -313.5643005371094, "logps/real": -238.37448120117188, "loss": 0.0616, "rewards/accuracies": 1.0, "rewards/generated": -2.1593971252441406, "rewards/margins": 4.199219703674316, "rewards/real": 2.0398221015930176, "step": 1940 }, { "epoch": 1.25, "learning_rate": 3.245260663507109e-07, "logits/generated": 6.488961219787598, "logits/real": 4.785304069519043, "logps/generated": -305.1136474609375, "logps/real": -226.7467498779297, "loss": 0.0907, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.0184876918792725, "rewards/margins": 4.260189056396484, "rewards/real": 2.241701126098633, "step": 1950 }, { "epoch": 1.25, "learning_rate": 3.2334123222748817e-07, "logits/generated": 6.571511745452881, "logits/real": 4.719809055328369, "logps/generated": -288.8949279785156, "logps/real": -227.11788940429688, "loss": 0.0826, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.8733266592025757, "rewards/margins": 4.297513484954834, "rewards/real": 2.424187183380127, "step": 1960 }, { "epoch": 1.26, "learning_rate": 3.221563981042654e-07, "logits/generated": 6.46596622467041, "logits/real": 5.228185653686523, "logps/generated": -317.7358703613281, "logps/real": -263.435791015625, "loss": 0.0856, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.4127206802368164, "rewards/margins": 4.111409664154053, "rewards/real": 1.6986888647079468, "step": 1970 }, { "epoch": 1.27, "learning_rate": 3.209715639810426e-07, "logits/generated": 6.481564998626709, "logits/real": 4.614343166351318, "logps/generated": -310.3409729003906, "logps/real": -232.2714385986328, "loss": 0.0894, "rewards/accuracies": 0.9375, "rewards/generated": -2.1927568912506104, "rewards/margins": 4.123000144958496, "rewards/real": 1.930242896080017, "step": 1980 }, { "epoch": 1.27, "learning_rate": 3.197867298578199e-07, "logits/generated": 6.458526611328125, "logits/real": 4.698050498962402, "logps/generated": -300.57391357421875, "logps/real": -227.27169799804688, "loss": 0.0675, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.231247901916504, "rewards/margins": 4.3048787117004395, "rewards/real": 2.0736308097839355, "step": 1990 }, { "epoch": 1.28, "learning_rate": 3.186018957345971e-07, "logits/generated": 6.454297065734863, "logits/real": 4.6443586349487305, "logps/generated": -299.6277160644531, "logps/real": -228.95370483398438, "loss": 0.0791, "rewards/accuracies": 1.0, "rewards/generated": -2.205613613128662, "rewards/margins": 4.489793300628662, "rewards/real": 2.2841796875, "step": 2000 }, { "epoch": 1.29, "learning_rate": 3.174170616113744e-07, "logits/generated": 6.567930698394775, "logits/real": 4.493962287902832, "logps/generated": -296.0806579589844, "logps/real": -206.06979370117188, "loss": 0.0601, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.3037962913513184, "rewards/margins": 4.68963623046875, "rewards/real": 2.3858392238616943, "step": 2010 }, { "epoch": 1.29, "learning_rate": 3.1623222748815165e-07, "logits/generated": 6.372540473937988, "logits/real": 4.720137596130371, "logps/generated": -283.2089538574219, "logps/real": -228.1040802001953, "loss": 0.0764, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.127713918685913, "rewards/margins": 4.436669826507568, "rewards/real": 2.3089561462402344, "step": 2020 }, { "epoch": 1.3, "learning_rate": 3.150473933649289e-07, "logits/generated": 6.5518317222595215, "logits/real": 4.7907562255859375, "logps/generated": -305.01019287109375, "logps/real": -233.3303985595703, "loss": 0.0768, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.040151834487915, "rewards/margins": 4.2342705726623535, "rewards/real": 2.1941189765930176, "step": 2030 }, { "epoch": 1.31, "learning_rate": 3.1386255924170614e-07, "logits/generated": 6.603424072265625, "logits/real": 4.3683857917785645, "logps/generated": -288.2603759765625, "logps/real": -198.78309631347656, "loss": 0.0797, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -1.926896333694458, "rewards/margins": 4.150208950042725, "rewards/real": 2.2233126163482666, "step": 2040 }, { "epoch": 1.31, "learning_rate": 3.126777251184834e-07, "logits/generated": 6.489920616149902, "logits/real": 4.773646831512451, "logps/generated": -312.36236572265625, "logps/real": -234.63076782226562, "loss": 0.062, "rewards/accuracies": 1.0, "rewards/generated": -2.2082715034484863, "rewards/margins": 4.475545883178711, "rewards/real": 2.2672739028930664, "step": 2050 }, { "epoch": 1.32, "learning_rate": 3.1149289099526064e-07, "logits/generated": 6.615997314453125, "logits/real": 4.535744667053223, "logps/generated": -283.09326171875, "logps/real": -217.5418701171875, "loss": 0.0765, "rewards/accuracies": 0.949999988079071, "rewards/generated": -2.2303497791290283, "rewards/margins": 4.6097259521484375, "rewards/real": 2.3793764114379883, "step": 2060 }, { "epoch": 1.32, "learning_rate": 3.103080568720379e-07, "logits/generated": 6.409584045410156, "logits/real": 4.447390556335449, "logps/generated": -290.5548095703125, "logps/real": -227.99038696289062, "loss": 0.0963, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.0898966789245605, "rewards/margins": 4.26753568649292, "rewards/real": 2.1776394844055176, "step": 2070 }, { "epoch": 1.33, "learning_rate": 3.0912322274881513e-07, "logits/generated": 6.556498050689697, "logits/real": 4.974642753601074, "logps/generated": -300.2599182128906, "logps/real": -241.9363555908203, "loss": 0.079, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.5023233890533447, "rewards/margins": 4.799649238586426, "rewards/real": 2.297325372695923, "step": 2080 }, { "epoch": 1.34, "learning_rate": 3.079383886255924e-07, "logits/generated": 6.597992897033691, "logits/real": 4.567930698394775, "logps/generated": -311.3286437988281, "logps/real": -219.1073455810547, "loss": 0.06, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.2045724391937256, "rewards/margins": 4.53138542175293, "rewards/real": 2.326812267303467, "step": 2090 }, { "epoch": 1.34, "learning_rate": 3.067535545023697e-07, "logits/generated": 6.559409141540527, "logits/real": 4.554577827453613, "logps/generated": -303.6340637207031, "logps/real": -208.86483764648438, "loss": 0.0687, "rewards/accuracies": 1.0, "rewards/generated": -2.4833426475524902, "rewards/margins": 4.425951957702637, "rewards/real": 1.9426090717315674, "step": 2100 }, { "epoch": 1.35, "learning_rate": 3.055687203791469e-07, "logits/generated": 6.533651828765869, "logits/real": 4.764289379119873, "logps/generated": -295.6280822753906, "logps/real": -224.78646850585938, "loss": 0.0814, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.3707690238952637, "rewards/margins": 4.8221025466918945, "rewards/real": 2.451333522796631, "step": 2110 }, { "epoch": 1.36, "learning_rate": 3.0438388625592417e-07, "logits/generated": 6.513457298278809, "logits/real": 4.793578147888184, "logps/generated": -311.8963928222656, "logps/real": -224.7337646484375, "loss": 0.0821, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.5289576053619385, "rewards/margins": 4.804759502410889, "rewards/real": 2.2758023738861084, "step": 2120 }, { "epoch": 1.36, "learning_rate": 3.0319905213270144e-07, "logits/generated": 6.621163368225098, "logits/real": 4.561090469360352, "logps/generated": -303.7142028808594, "logps/real": -222.3753662109375, "loss": 0.0836, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.326003074645996, "rewards/margins": 4.459982872009277, "rewards/real": 2.1339797973632812, "step": 2130 }, { "epoch": 1.37, "learning_rate": 3.0201421800947866e-07, "logits/generated": 6.580770969390869, "logits/real": 4.773768901824951, "logps/generated": -311.6327209472656, "logps/real": -249.0811309814453, "loss": 0.0835, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -2.3817009925842285, "rewards/margins": 4.315882682800293, "rewards/real": 1.934181809425354, "step": 2140 }, { "epoch": 1.38, "learning_rate": 3.008293838862559e-07, "logits/generated": 6.521528720855713, "logits/real": 4.7063798904418945, "logps/generated": -294.9180908203125, "logps/real": -223.62643432617188, "loss": 0.1009, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -2.208840847015381, "rewards/margins": 4.619427680969238, "rewards/real": 2.4105873107910156, "step": 2150 }, { "epoch": 1.38, "learning_rate": 2.9964454976303315e-07, "logits/generated": 6.510763645172119, "logits/real": 4.770654201507568, "logps/generated": -306.3185729980469, "logps/real": -223.23654174804688, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/generated": -2.2273476123809814, "rewards/margins": 4.425220966339111, "rewards/real": 2.197873592376709, "step": 2160 }, { "epoch": 1.39, "learning_rate": 2.984597156398104e-07, "logits/generated": 6.569235801696777, "logits/real": 4.330723762512207, "logps/generated": -300.669677734375, "logps/real": -213.9007568359375, "loss": 0.0789, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.5640532970428467, "rewards/margins": 4.792849540710449, "rewards/real": 2.2287967205047607, "step": 2170 }, { "epoch": 1.39, "learning_rate": 2.9727488151658765e-07, "logits/generated": 6.663917541503906, "logits/real": 4.72244119644165, "logps/generated": -314.7903747558594, "logps/real": -252.47933959960938, "loss": 0.0659, "rewards/accuracies": 1.0, "rewards/generated": -2.60965895652771, "rewards/margins": 4.690103054046631, "rewards/real": 2.080443859100342, "step": 2180 }, { "epoch": 1.4, "learning_rate": 2.960900473933649e-07, "logits/generated": 6.465506553649902, "logits/real": 5.010118007659912, "logps/generated": -297.9461975097656, "logps/real": -224.8966522216797, "loss": 0.0514, "rewards/accuracies": 1.0, "rewards/generated": -2.0201261043548584, "rewards/margins": 4.045393943786621, "rewards/real": 2.025268077850342, "step": 2190 }, { "epoch": 1.41, "learning_rate": 2.949052132701422e-07, "logits/generated": 6.5695037841796875, "logits/real": 4.864360332489014, "logps/generated": -300.3662414550781, "logps/real": -243.6785888671875, "loss": 0.0647, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -2.213184118270874, "rewards/margins": 4.579488754272461, "rewards/real": 2.366304397583008, "step": 2200 }, { "epoch": 1.41, "learning_rate": 2.9372037914691946e-07, "logits/generated": 6.518437385559082, "logits/real": 4.748892784118652, "logps/generated": -309.49658203125, "logps/real": -216.10751342773438, "loss": 0.0583, "rewards/accuracies": 1.0, "rewards/generated": -2.4229207038879395, "rewards/margins": 4.442441463470459, "rewards/real": 2.0195205211639404, "step": 2210 }, { "epoch": 1.42, "learning_rate": 2.9253554502369663e-07, "logits/generated": 6.526637077331543, "logits/real": 4.67498779296875, "logps/generated": -313.6875, "logps/real": -231.32199096679688, "loss": 0.0762, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -2.6978516578674316, "rewards/margins": 4.7451090812683105, "rewards/real": 2.0472571849823, "step": 2220 }, { "epoch": 1.43, "learning_rate": 2.913507109004739e-07, "logits/generated": 6.5197906494140625, "logits/real": 4.786774635314941, "logps/generated": -300.3356628417969, "logps/real": -238.114501953125, "loss": 0.07, "rewards/accuracies": 1.0, "rewards/generated": -2.6963155269622803, "rewards/margins": 5.061102867126465, "rewards/real": 2.3647871017456055, "step": 2230 }, { "epoch": 1.43, "learning_rate": 2.901658767772512e-07, "logits/generated": 6.281010627746582, "logits/real": 4.850646495819092, "logps/generated": -311.21044921875, "logps/real": -224.34005737304688, "loss": 0.0678, "rewards/accuracies": 1.0, "rewards/generated": -2.614072561264038, "rewards/margins": 4.778448104858398, "rewards/real": 2.1643755435943604, "step": 2240 }, { "epoch": 1.44, "learning_rate": 2.889810426540284e-07, "logits/generated": 6.2719526290893555, "logits/real": 4.559032440185547, "logps/generated": -300.2083435058594, "logps/real": -220.322998046875, "loss": 0.0626, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.7301433086395264, "rewards/margins": 5.041817665100098, "rewards/real": 2.311674118041992, "step": 2250 }, { "epoch": 1.45, "learning_rate": 2.8779620853080567e-07, "logits/generated": 6.567207336425781, "logits/real": 4.634784698486328, "logps/generated": -306.6617736816406, "logps/real": -223.53744506835938, "loss": 0.0634, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.5135562419891357, "rewards/margins": 4.631071090698242, "rewards/real": 2.1175150871276855, "step": 2260 }, { "epoch": 1.45, "learning_rate": 2.8661137440758294e-07, "logits/generated": 6.572225093841553, "logits/real": 5.128222465515137, "logps/generated": -305.1805419921875, "logps/real": -244.2073974609375, "loss": 0.0913, "rewards/accuracies": 0.949999988079071, "rewards/generated": -2.5788116455078125, "rewards/margins": 4.599055767059326, "rewards/real": 2.0202441215515137, "step": 2270 }, { "epoch": 1.46, "learning_rate": 2.854265402843602e-07, "logits/generated": 6.486606597900391, "logits/real": 4.381880283355713, "logps/generated": -299.26226806640625, "logps/real": -204.21128845214844, "loss": 0.0607, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.690892219543457, "rewards/margins": 4.859349727630615, "rewards/real": 2.168457508087158, "step": 2280 }, { "epoch": 1.47, "learning_rate": 2.842417061611374e-07, "logits/generated": 6.467171669006348, "logits/real": 4.881519317626953, "logps/generated": -316.6208801269531, "logps/real": -249.66964721679688, "loss": 0.0561, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.9214892387390137, "rewards/margins": 4.928795337677002, "rewards/real": 2.0073063373565674, "step": 2290 }, { "epoch": 1.47, "learning_rate": 2.8305687203791465e-07, "logits/generated": 6.373992443084717, "logits/real": 4.881070613861084, "logps/generated": -304.5431823730469, "logps/real": -238.75918579101562, "loss": 0.0637, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.7934460639953613, "rewards/margins": 5.073416709899902, "rewards/real": 2.279970645904541, "step": 2300 }, { "epoch": 1.48, "learning_rate": 2.8187203791469193e-07, "logits/generated": 6.459514617919922, "logits/real": 4.823994159698486, "logps/generated": -297.67156982421875, "logps/real": -244.6254425048828, "loss": 0.07, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.617419958114624, "rewards/margins": 4.747994422912598, "rewards/real": 2.1305739879608154, "step": 2310 }, { "epoch": 1.48, "learning_rate": 2.806872037914692e-07, "logits/generated": 6.545841217041016, "logits/real": 4.525175094604492, "logps/generated": -300.8380126953125, "logps/real": -230.91641235351562, "loss": 0.0413, "rewards/accuracies": 1.0, "rewards/generated": -2.621128797531128, "rewards/margins": 4.8851799964904785, "rewards/real": 2.2640514373779297, "step": 2320 }, { "epoch": 1.49, "learning_rate": 2.795023696682464e-07, "logits/generated": 6.552148342132568, "logits/real": 4.585801601409912, "logps/generated": -291.0857238769531, "logps/real": -203.3738250732422, "loss": 0.0633, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -2.501784324645996, "rewards/margins": 4.606094837188721, "rewards/real": 2.1043105125427246, "step": 2330 }, { "epoch": 1.5, "learning_rate": 2.783175355450237e-07, "logits/generated": 6.5855712890625, "logits/real": 4.8127546310424805, "logps/generated": -304.4456787109375, "logps/real": -241.84164428710938, "loss": 0.0662, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.4264376163482666, "rewards/margins": 4.380554676055908, "rewards/real": 1.9541168212890625, "step": 2340 }, { "epoch": 1.5, "learning_rate": 2.7713270142180097e-07, "logits/generated": 6.61553955078125, "logits/real": 4.842226982116699, "logps/generated": -304.24908447265625, "logps/real": -228.1985321044922, "loss": 0.0723, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.668332815170288, "rewards/margins": 4.606504917144775, "rewards/real": 1.9381721019744873, "step": 2350 }, { "epoch": 1.51, "learning_rate": 2.759478672985782e-07, "logits/generated": 6.651033878326416, "logits/real": 5.216360569000244, "logps/generated": -299.9844665527344, "logps/real": -245.30838012695312, "loss": 0.058, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.7325878143310547, "rewards/margins": 4.621773719787598, "rewards/real": 1.8891855478286743, "step": 2360 }, { "epoch": 1.52, "learning_rate": 2.747630331753554e-07, "logits/generated": 6.4999237060546875, "logits/real": 4.944417476654053, "logps/generated": -296.3262023925781, "logps/real": -230.99472045898438, "loss": 0.0709, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.5117669105529785, "rewards/margins": 4.533594131469727, "rewards/real": 2.021827220916748, "step": 2370 }, { "epoch": 1.52, "learning_rate": 2.735781990521327e-07, "logits/generated": 6.586300849914551, "logits/real": 4.376750946044922, "logps/generated": -308.04730224609375, "logps/real": -199.63783264160156, "loss": 0.0526, "rewards/accuracies": 1.0, "rewards/generated": -2.739675521850586, "rewards/margins": 5.009511947631836, "rewards/real": 2.26983642578125, "step": 2380 }, { "epoch": 1.53, "learning_rate": 2.7239336492890995e-07, "logits/generated": 6.62445592880249, "logits/real": 4.475614547729492, "logps/generated": -299.97686767578125, "logps/real": -205.2351837158203, "loss": 0.0631, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.6585028171539307, "rewards/margins": 4.911370277404785, "rewards/real": 2.2528672218322754, "step": 2390 }, { "epoch": 1.54, "learning_rate": 2.7120853080568717e-07, "logits/generated": 6.5361738204956055, "logits/real": 4.921416282653809, "logps/generated": -320.28619384765625, "logps/real": -235.4938507080078, "loss": 0.0686, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.5308923721313477, "rewards/margins": 4.577965259552002, "rewards/real": 2.047072649002075, "step": 2400 }, { "epoch": 1.54, "learning_rate": 2.7002369668246444e-07, "logits/generated": 6.583050727844238, "logits/real": 4.50087833404541, "logps/generated": -305.26544189453125, "logps/real": -205.6936492919922, "loss": 0.0646, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.650360584259033, "rewards/margins": 4.845761299133301, "rewards/real": 2.195401191711426, "step": 2410 }, { "epoch": 1.55, "learning_rate": 2.688388625592417e-07, "logits/generated": 6.528054714202881, "logits/real": 4.9675092697143555, "logps/generated": -287.01385498046875, "logps/real": -239.06137084960938, "loss": 0.0695, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.433396577835083, "rewards/margins": 4.63083553314209, "rewards/real": 2.1974387168884277, "step": 2420 }, { "epoch": 1.55, "learning_rate": 2.6765402843601894e-07, "logits/generated": 6.572503089904785, "logits/real": 4.664327621459961, "logps/generated": -299.9050598144531, "logps/real": -226.3813934326172, "loss": 0.0589, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.615846633911133, "rewards/margins": 4.903604030609131, "rewards/real": 2.287757396697998, "step": 2430 }, { "epoch": 1.56, "learning_rate": 2.664691943127962e-07, "logits/generated": 6.500199794769287, "logits/real": 4.409898281097412, "logps/generated": -291.48956298828125, "logps/real": -221.5597686767578, "loss": 0.0677, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.5167105197906494, "rewards/margins": 4.5962815284729, "rewards/real": 2.079570770263672, "step": 2440 }, { "epoch": 1.57, "learning_rate": 2.6528436018957343e-07, "logits/generated": 6.451045036315918, "logits/real": 4.783955097198486, "logps/generated": -297.8843688964844, "logps/real": -234.1428985595703, "loss": 0.0697, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.9517464637756348, "rewards/margins": 5.131245136260986, "rewards/real": 2.1794984340667725, "step": 2450 }, { "epoch": 1.57, "learning_rate": 2.640995260663507e-07, "logits/generated": 6.4251837730407715, "logits/real": 4.974338054656982, "logps/generated": -286.2950134277344, "logps/real": -229.6946258544922, "loss": 0.0595, "rewards/accuracies": 1.0, "rewards/generated": -2.7505643367767334, "rewards/margins": 4.802645683288574, "rewards/real": 2.05208158493042, "step": 2460 }, { "epoch": 1.58, "learning_rate": 2.629146919431279e-07, "logits/generated": 6.499301910400391, "logits/real": 4.888270378112793, "logps/generated": -313.7422180175781, "logps/real": -248.39376831054688, "loss": 0.0575, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.546126127243042, "rewards/margins": 4.463229179382324, "rewards/real": 1.9171028137207031, "step": 2470 }, { "epoch": 1.59, "learning_rate": 2.617298578199052e-07, "logits/generated": 6.561995506286621, "logits/real": 4.65811014175415, "logps/generated": -311.327392578125, "logps/real": -216.3298797607422, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/generated": -2.7726848125457764, "rewards/margins": 4.696637153625488, "rewards/real": 1.923952341079712, "step": 2480 }, { "epoch": 1.59, "learning_rate": 2.6054502369668247e-07, "logits/generated": 6.564155578613281, "logits/real": 4.5207109451293945, "logps/generated": -285.0248107910156, "logps/real": -203.79513549804688, "loss": 0.0668, "rewards/accuracies": 0.949999988079071, "rewards/generated": -2.5576417446136475, "rewards/margins": 4.535670280456543, "rewards/real": 1.978028655052185, "step": 2490 }, { "epoch": 1.6, "learning_rate": 2.5936018957345974e-07, "logits/generated": 6.515015602111816, "logits/real": 4.583956241607666, "logps/generated": -298.4903869628906, "logps/real": -232.15805053710938, "loss": 0.0499, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.533477544784546, "rewards/margins": 4.886750221252441, "rewards/real": 2.353271961212158, "step": 2500 }, { "epoch": 1.61, "learning_rate": 2.5817535545023696e-07, "logits/generated": 6.423288822174072, "logits/real": 5.059484958648682, "logps/generated": -324.56365966796875, "logps/real": -248.84371948242188, "loss": 0.0654, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.873297691345215, "rewards/margins": 5.0330986976623535, "rewards/real": 2.1598012447357178, "step": 2510 }, { "epoch": 1.61, "learning_rate": 2.5699052132701423e-07, "logits/generated": 6.517401218414307, "logits/real": 5.171728134155273, "logps/generated": -307.1355285644531, "logps/real": -257.387451171875, "loss": 0.0434, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.787299633026123, "rewards/margins": 4.657351016998291, "rewards/real": 1.8700507879257202, "step": 2520 }, { "epoch": 1.62, "learning_rate": 2.5580568720379145e-07, "logits/generated": 6.6013922691345215, "logits/real": 4.844089984893799, "logps/generated": -297.20379638671875, "logps/real": -221.24124145507812, "loss": 0.0468, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.8276898860931396, "rewards/margins": 4.962311744689941, "rewards/real": 2.1346216201782227, "step": 2530 }, { "epoch": 1.63, "learning_rate": 2.5462085308056867e-07, "logits/generated": 6.696247100830078, "logits/real": 4.772692680358887, "logps/generated": -315.21832275390625, "logps/real": -226.9287567138672, "loss": 0.0455, "rewards/accuracies": 1.0, "rewards/generated": -2.7525386810302734, "rewards/margins": 4.851990222930908, "rewards/real": 2.0994513034820557, "step": 2540 }, { "epoch": 1.63, "learning_rate": 2.5343601895734595e-07, "logits/generated": 6.510929107666016, "logits/real": 4.709242343902588, "logps/generated": -306.0609436035156, "logps/real": -211.63021850585938, "loss": 0.0599, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.842668056488037, "rewards/margins": 4.8463568687438965, "rewards/real": 2.0036888122558594, "step": 2550 }, { "epoch": 1.64, "learning_rate": 2.522511848341232e-07, "logits/generated": 6.442812442779541, "logits/real": 4.947168827056885, "logps/generated": -322.0133056640625, "logps/real": -240.52841186523438, "loss": 0.0578, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.7832953929901123, "rewards/margins": 4.885197639465332, "rewards/real": 2.101902484893799, "step": 2560 }, { "epoch": 1.64, "learning_rate": 2.510663507109005e-07, "logits/generated": 6.514994144439697, "logits/real": 4.442912578582764, "logps/generated": -313.75628662109375, "logps/real": -222.9632568359375, "loss": 0.0421, "rewards/accuracies": 1.0, "rewards/generated": -2.991903305053711, "rewards/margins": 5.337101936340332, "rewards/real": 2.3451991081237793, "step": 2570 }, { "epoch": 1.65, "learning_rate": 2.498815165876777e-07, "logits/generated": 6.605074405670166, "logits/real": 4.601679801940918, "logps/generated": -310.7535705566406, "logps/real": -230.5736541748047, "loss": 0.0501, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.7398934364318848, "rewards/margins": 5.229077339172363, "rewards/real": 2.4891839027404785, "step": 2580 }, { "epoch": 1.66, "learning_rate": 2.48696682464455e-07, "logits/generated": 6.653934478759766, "logits/real": 5.141819953918457, "logps/generated": -328.99005126953125, "logps/real": -241.0696563720703, "loss": 0.0697, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.0767884254455566, "rewards/margins": 5.069361686706543, "rewards/real": 1.9925737380981445, "step": 2590 }, { "epoch": 1.66, "learning_rate": 2.475118483412322e-07, "logits/generated": 6.622979164123535, "logits/real": 5.017902374267578, "logps/generated": -309.37762451171875, "logps/real": -230.4441375732422, "loss": 0.0612, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.974555253982544, "rewards/margins": 5.106658935546875, "rewards/real": 2.13210391998291, "step": 2600 }, { "epoch": 1.67, "learning_rate": 2.463270142180095e-07, "logits/generated": 6.475897312164307, "logits/real": 4.987759590148926, "logps/generated": -323.8994140625, "logps/real": -236.59725952148438, "loss": 0.0486, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.926840305328369, "rewards/margins": 5.029897212982178, "rewards/real": 2.1030571460723877, "step": 2610 }, { "epoch": 1.68, "learning_rate": 2.451421800947867e-07, "logits/generated": 6.513401985168457, "logits/real": 4.884097576141357, "logps/generated": -295.1630554199219, "logps/real": -219.2560272216797, "loss": 0.0509, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.579526662826538, "rewards/margins": 4.818698883056641, "rewards/real": 2.2391719818115234, "step": 2620 }, { "epoch": 1.68, "learning_rate": 2.4395734597156397e-07, "logits/generated": 6.569561958312988, "logits/real": 4.7536301612854, "logps/generated": -314.96917724609375, "logps/real": -241.3905487060547, "loss": 0.0581, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.9935827255249023, "rewards/margins": 5.2356743812561035, "rewards/real": 2.242091655731201, "step": 2630 }, { "epoch": 1.69, "learning_rate": 2.4277251184834124e-07, "logits/generated": 6.643240451812744, "logits/real": 4.672991752624512, "logps/generated": -298.296630859375, "logps/real": -218.02627563476562, "loss": 0.0567, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.751802921295166, "rewards/margins": 4.864652156829834, "rewards/real": 2.112849473953247, "step": 2640 }, { "epoch": 1.7, "learning_rate": 2.4158767772511846e-07, "logits/generated": 6.543112277984619, "logits/real": 4.794711112976074, "logps/generated": -300.0271911621094, "logps/real": -229.9580535888672, "loss": 0.0449, "rewards/accuracies": 1.0, "rewards/generated": -2.9769158363342285, "rewards/margins": 5.159039497375488, "rewards/real": 2.182124137878418, "step": 2650 }, { "epoch": 1.7, "learning_rate": 2.4040284360189573e-07, "logits/generated": 6.426865577697754, "logits/real": 5.070995807647705, "logps/generated": -312.7875061035156, "logps/real": -244.02859497070312, "loss": 0.0482, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.957658529281616, "rewards/margins": 4.830506801605225, "rewards/real": 1.8728487491607666, "step": 2660 }, { "epoch": 1.71, "learning_rate": 2.39218009478673e-07, "logits/generated": 6.447142601013184, "logits/real": 4.88741397857666, "logps/generated": -316.395751953125, "logps/real": -238.98989868164062, "loss": 0.0624, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.3095619678497314, "rewards/margins": 5.356574535369873, "rewards/real": 2.0470128059387207, "step": 2670 }, { "epoch": 1.71, "learning_rate": 2.3803317535545023e-07, "logits/generated": 6.639400482177734, "logits/real": 4.574619293212891, "logps/generated": -311.22705078125, "logps/real": -213.94375610351562, "loss": 0.0609, "rewards/accuracies": 1.0, "rewards/generated": -2.9241037368774414, "rewards/margins": 5.196986675262451, "rewards/real": 2.2728826999664307, "step": 2680 }, { "epoch": 1.72, "learning_rate": 2.3684834123222747e-07, "logits/generated": 6.604647159576416, "logits/real": 4.500279426574707, "logps/generated": -297.166015625, "logps/real": -226.86227416992188, "loss": 0.0609, "rewards/accuracies": 1.0, "rewards/generated": -3.1100852489471436, "rewards/margins": 5.564393520355225, "rewards/real": 2.45430850982666, "step": 2690 }, { "epoch": 1.73, "learning_rate": 2.3566350710900475e-07, "logits/generated": 6.377281665802002, "logits/real": 4.427682399749756, "logps/generated": -303.52667236328125, "logps/real": -228.33834838867188, "loss": 0.0493, "rewards/accuracies": 1.0, "rewards/generated": -2.9995968341827393, "rewards/margins": 5.4720869064331055, "rewards/real": 2.4724905490875244, "step": 2700 }, { "epoch": 1.73, "learning_rate": 2.3447867298578197e-07, "logits/generated": 6.596705436706543, "logits/real": 4.664139747619629, "logps/generated": -299.83526611328125, "logps/real": -237.1983184814453, "loss": 0.0687, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.115382432937622, "rewards/margins": 5.5024261474609375, "rewards/real": 2.3870432376861572, "step": 2710 }, { "epoch": 1.74, "learning_rate": 2.3329383886255924e-07, "logits/generated": 6.471650123596191, "logits/real": 4.98661470413208, "logps/generated": -323.77984619140625, "logps/real": -239.13720703125, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/generated": -3.5205624103546143, "rewards/margins": 6.0251383781433105, "rewards/real": 2.5045764446258545, "step": 2720 }, { "epoch": 1.75, "learning_rate": 2.3210900473933649e-07, "logits/generated": 6.571569919586182, "logits/real": 5.169942855834961, "logps/generated": -310.2748107910156, "logps/real": -243.19113159179688, "loss": 0.0424, "rewards/accuracies": 1.0, "rewards/generated": -3.2248611450195312, "rewards/margins": 5.1551642417907715, "rewards/real": 1.9303032159805298, "step": 2730 }, { "epoch": 1.75, "learning_rate": 2.3092417061611373e-07, "logits/generated": 6.645476341247559, "logits/real": 4.323509693145752, "logps/generated": -305.4945373535156, "logps/real": -211.2071990966797, "loss": 0.0508, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.021256923675537, "rewards/margins": 5.193289756774902, "rewards/real": 2.1720330715179443, "step": 2740 }, { "epoch": 1.76, "learning_rate": 2.2973933649289098e-07, "logits/generated": 6.557646751403809, "logits/real": 4.875040531158447, "logps/generated": -320.00830078125, "logps/real": -238.9925994873047, "loss": 0.042, "rewards/accuracies": 1.0, "rewards/generated": -3.284480333328247, "rewards/margins": 5.7841997146606445, "rewards/real": 2.4997196197509766, "step": 2750 }, { "epoch": 1.77, "learning_rate": 2.2855450236966822e-07, "logits/generated": 6.572206974029541, "logits/real": 4.644891262054443, "logps/generated": -314.3716735839844, "logps/real": -220.7062225341797, "loss": 0.0737, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.0081522464752197, "rewards/margins": 4.949704170227051, "rewards/real": 1.9415524005889893, "step": 2760 }, { "epoch": 1.77, "learning_rate": 2.273696682464455e-07, "logits/generated": 6.575781345367432, "logits/real": 4.972030162811279, "logps/generated": -304.5403137207031, "logps/real": -226.6322021484375, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/generated": -3.0886337757110596, "rewards/margins": 5.250014781951904, "rewards/real": 2.1613805294036865, "step": 2770 }, { "epoch": 1.78, "learning_rate": 2.2618483412322272e-07, "logits/generated": 6.253432273864746, "logits/real": 4.807991981506348, "logps/generated": -308.74737548828125, "logps/real": -265.22088623046875, "loss": 0.058, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -3.068493366241455, "rewards/margins": 5.278921127319336, "rewards/real": 2.2104272842407227, "step": 2780 }, { "epoch": 1.79, "learning_rate": 2.25e-07, "logits/generated": 6.551450252532959, "logits/real": 5.10826301574707, "logps/generated": -307.3768615722656, "logps/real": -237.50149536132812, "loss": 0.0522, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.9784510135650635, "rewards/margins": 4.849761962890625, "rewards/real": 1.8713109493255615, "step": 2790 }, { "epoch": 1.79, "learning_rate": 2.2381516587677724e-07, "logits/generated": 6.531739234924316, "logits/real": 4.827146530151367, "logps/generated": -314.5278015136719, "logps/real": -241.0078887939453, "loss": 0.0595, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.0112459659576416, "rewards/margins": 4.8616437911987305, "rewards/real": 1.8503978252410889, "step": 2800 }, { "epoch": 1.8, "learning_rate": 2.226303317535545e-07, "logits/generated": 6.410187721252441, "logits/real": 5.148890495300293, "logps/generated": -312.85565185546875, "logps/real": -238.3211212158203, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/generated": -3.0788676738739014, "rewards/margins": 5.32081413269043, "rewards/real": 2.2419466972351074, "step": 2810 }, { "epoch": 1.8, "learning_rate": 2.2144549763033173e-07, "logits/generated": 6.559216499328613, "logits/real": 4.562496662139893, "logps/generated": -326.8006591796875, "logps/real": -229.6642608642578, "loss": 0.046, "rewards/accuracies": 1.0, "rewards/generated": -3.402270793914795, "rewards/margins": 5.84303092956543, "rewards/real": 2.4407601356506348, "step": 2820 }, { "epoch": 1.81, "learning_rate": 2.20260663507109e-07, "logits/generated": 6.503948211669922, "logits/real": 4.351350784301758, "logps/generated": -308.74688720703125, "logps/real": -216.2238006591797, "loss": 0.0504, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.0520529747009277, "rewards/margins": 5.5729241371154785, "rewards/real": 2.5208706855773926, "step": 2830 }, { "epoch": 1.82, "learning_rate": 2.1907582938388625e-07, "logits/generated": 6.486424922943115, "logits/real": 4.887758731842041, "logps/generated": -324.04327392578125, "logps/real": -245.8160400390625, "loss": 0.0484, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.8712515830993652, "rewards/margins": 5.168000221252441, "rewards/real": 2.296747922897339, "step": 2840 }, { "epoch": 1.82, "learning_rate": 2.178909952606635e-07, "logits/generated": 6.496403694152832, "logits/real": 4.948525428771973, "logps/generated": -313.8655090332031, "logps/real": -232.6029815673828, "loss": 0.0384, "rewards/accuracies": 1.0, "rewards/generated": -3.164641857147217, "rewards/margins": 5.175568103790283, "rewards/real": 2.0109262466430664, "step": 2850 }, { "epoch": 1.83, "learning_rate": 2.1670616113744074e-07, "logits/generated": 6.357646465301514, "logits/real": 4.914302825927734, "logps/generated": -304.9514465332031, "logps/real": -231.0625, "loss": 0.0739, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.8249194622039795, "rewards/margins": 4.825296878814697, "rewards/real": 2.0003771781921387, "step": 2860 }, { "epoch": 1.84, "learning_rate": 2.15521327014218e-07, "logits/generated": 6.526192665100098, "logits/real": 4.569971084594727, "logps/generated": -310.04571533203125, "logps/real": -212.1841278076172, "loss": 0.0442, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.9459242820739746, "rewards/margins": 5.625715732574463, "rewards/real": 2.6797919273376465, "step": 2870 }, { "epoch": 1.84, "learning_rate": 2.1433649289099526e-07, "logits/generated": 6.470966339111328, "logits/real": 4.952636241912842, "logps/generated": -298.78216552734375, "logps/real": -241.9241180419922, "loss": 0.0488, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -2.7446885108947754, "rewards/margins": 4.924825668334961, "rewards/real": 2.1801369190216064, "step": 2880 }, { "epoch": 1.85, "learning_rate": 2.131516587677725e-07, "logits/generated": 6.543872833251953, "logits/real": 5.083493232727051, "logps/generated": -328.01873779296875, "logps/real": -251.84909057617188, "loss": 0.0382, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.1929991245269775, "rewards/margins": 5.793171405792236, "rewards/real": 2.6001715660095215, "step": 2890 }, { "epoch": 1.86, "learning_rate": 2.1196682464454975e-07, "logits/generated": 6.523638725280762, "logits/real": 4.766221046447754, "logps/generated": -294.3127136230469, "logps/real": -227.9248809814453, "loss": 0.0495, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.653705596923828, "rewards/margins": 4.580477714538574, "rewards/real": 1.9267723560333252, "step": 2900 }, { "epoch": 1.86, "learning_rate": 2.10781990521327e-07, "logits/generated": 6.478121757507324, "logits/real": 5.104989528656006, "logps/generated": -296.796875, "logps/real": -241.1748809814453, "loss": 0.0434, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.8308489322662354, "rewards/margins": 4.990128517150879, "rewards/real": 2.1592795848846436, "step": 2910 }, { "epoch": 1.87, "learning_rate": 2.0959715639810427e-07, "logits/generated": 6.648934841156006, "logits/real": 4.384207725524902, "logps/generated": -314.3799133300781, "logps/real": -215.88339233398438, "loss": 0.0426, "rewards/accuracies": 1.0, "rewards/generated": -3.3432087898254395, "rewards/margins": 5.64975118637085, "rewards/real": 2.306542158126831, "step": 2920 }, { "epoch": 1.87, "learning_rate": 2.0841232227488152e-07, "logits/generated": 6.548549652099609, "logits/real": 4.6239542961120605, "logps/generated": -317.40057373046875, "logps/real": -234.81787109375, "loss": 0.0453, "rewards/accuracies": 1.0, "rewards/generated": -3.6547443866729736, "rewards/margins": 5.791055202484131, "rewards/real": 2.1363110542297363, "step": 2930 }, { "epoch": 1.88, "learning_rate": 2.0722748815165874e-07, "logits/generated": 6.573233604431152, "logits/real": 4.537630558013916, "logps/generated": -312.5951232910156, "logps/real": -227.45846557617188, "loss": 0.0471, "rewards/accuracies": 1.0, "rewards/generated": -3.44865083694458, "rewards/margins": 5.903794288635254, "rewards/real": 2.455143690109253, "step": 2940 }, { "epoch": 1.89, "learning_rate": 2.06042654028436e-07, "logits/generated": 6.5797014236450195, "logits/real": 4.640379428863525, "logps/generated": -309.2285461425781, "logps/real": -226.133056640625, "loss": 0.0369, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.0750410556793213, "rewards/margins": 5.1463847160339355, "rewards/real": 2.0713436603546143, "step": 2950 }, { "epoch": 1.89, "learning_rate": 2.0485781990521326e-07, "logits/generated": 6.535305023193359, "logits/real": 4.605700492858887, "logps/generated": -319.6959228515625, "logps/real": -225.58975219726562, "loss": 0.0426, "rewards/accuracies": 1.0, "rewards/generated": -3.13779878616333, "rewards/margins": 5.010854721069336, "rewards/real": 1.8730554580688477, "step": 2960 }, { "epoch": 1.9, "learning_rate": 2.0367298578199053e-07, "logits/generated": 6.437843322753906, "logits/real": 4.84013032913208, "logps/generated": -301.1745300292969, "logps/real": -227.8412322998047, "loss": 0.0418, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.978807210922241, "rewards/margins": 4.996744632720947, "rewards/real": 2.017937421798706, "step": 2970 }, { "epoch": 1.91, "learning_rate": 2.0248815165876775e-07, "logits/generated": 6.470362186431885, "logits/real": 5.108733654022217, "logps/generated": -319.79840087890625, "logps/real": -237.103515625, "loss": 0.0673, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.0991902351379395, "rewards/margins": 5.182190418243408, "rewards/real": 2.0830001831054688, "step": 2980 }, { "epoch": 1.91, "learning_rate": 2.0130331753554502e-07, "logits/generated": 6.5366621017456055, "logits/real": 5.0998101234436035, "logps/generated": -300.20758056640625, "logps/real": -247.12393188476562, "loss": 0.0637, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.1575188636779785, "rewards/margins": 5.251214981079102, "rewards/real": 2.093696117401123, "step": 2990 }, { "epoch": 1.92, "learning_rate": 2.0011848341232227e-07, "logits/generated": 6.446131706237793, "logits/real": 4.840705871582031, "logps/generated": -315.06439208984375, "logps/real": -226.3990478515625, "loss": 0.0472, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.494022846221924, "rewards/margins": 5.752326965332031, "rewards/real": 2.2583041191101074, "step": 3000 }, { "epoch": 1.93, "learning_rate": 1.9893364928909952e-07, "logits/generated": 6.619847297668457, "logits/real": 4.667858600616455, "logps/generated": -294.6004333496094, "logps/real": -197.28118896484375, "loss": 0.0512, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.1600637435913086, "rewards/margins": 5.338671684265137, "rewards/real": 2.1786084175109863, "step": 3010 }, { "epoch": 1.93, "learning_rate": 1.9774881516587676e-07, "logits/generated": 6.53371524810791, "logits/real": 4.700278282165527, "logps/generated": -309.0678405761719, "logps/real": -220.1807861328125, "loss": 0.0639, "rewards/accuracies": 1.0, "rewards/generated": -3.0327439308166504, "rewards/margins": 5.228504180908203, "rewards/real": 2.1957602500915527, "step": 3020 }, { "epoch": 1.94, "learning_rate": 1.96563981042654e-07, "logits/generated": 6.723300933837891, "logits/real": 4.539021968841553, "logps/generated": -309.5898132324219, "logps/real": -205.1199493408203, "loss": 0.0483, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.1201491355895996, "rewards/margins": 5.280410289764404, "rewards/real": 2.1602609157562256, "step": 3030 }, { "epoch": 1.94, "learning_rate": 1.9537914691943128e-07, "logits/generated": 6.615107536315918, "logits/real": 4.7945075035095215, "logps/generated": -340.0155944824219, "logps/real": -234.44259643554688, "loss": 0.0439, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.4857048988342285, "rewards/margins": 5.184080600738525, "rewards/real": 1.6983757019042969, "step": 3040 }, { "epoch": 1.95, "learning_rate": 1.9419431279620853e-07, "logits/generated": 6.457086086273193, "logits/real": 4.920231819152832, "logps/generated": -301.90081787109375, "logps/real": -209.1177978515625, "loss": 0.0413, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -2.7858481407165527, "rewards/margins": 4.725141525268555, "rewards/real": 1.9392932653427124, "step": 3050 }, { "epoch": 1.96, "learning_rate": 1.9300947867298577e-07, "logits/generated": 6.525505065917969, "logits/real": 4.8382768630981445, "logps/generated": -294.0171813964844, "logps/real": -215.0476531982422, "loss": 0.0385, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.3542685508728027, "rewards/margins": 5.338757514953613, "rewards/real": 1.9844884872436523, "step": 3060 }, { "epoch": 1.96, "learning_rate": 1.9182464454976302e-07, "logits/generated": 6.592054843902588, "logits/real": 4.595870494842529, "logps/generated": -303.69305419921875, "logps/real": -216.79074096679688, "loss": 0.044, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -3.3395583629608154, "rewards/margins": 5.180264949798584, "rewards/real": 1.8407065868377686, "step": 3070 }, { "epoch": 1.97, "learning_rate": 1.906398104265403e-07, "logits/generated": 6.421343803405762, "logits/real": 4.30601167678833, "logps/generated": -316.9730529785156, "logps/real": -221.79721069335938, "loss": 0.046, "rewards/accuracies": 1.0, "rewards/generated": -3.5621120929718018, "rewards/margins": 5.808313846588135, "rewards/real": 2.246202230453491, "step": 3080 }, { "epoch": 1.98, "learning_rate": 1.8945497630331754e-07, "logits/generated": 6.610709190368652, "logits/real": 4.592480182647705, "logps/generated": -305.1477355957031, "logps/real": -200.81964111328125, "loss": 0.0469, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.413442611694336, "rewards/margins": 5.351014614105225, "rewards/real": 1.9375722408294678, "step": 3090 }, { "epoch": 1.98, "learning_rate": 1.8827014218009476e-07, "logits/generated": 6.592061519622803, "logits/real": 4.954311370849609, "logps/generated": -323.15411376953125, "logps/real": -261.54278564453125, "loss": 0.0489, "rewards/accuracies": 1.0, "rewards/generated": -3.3372626304626465, "rewards/margins": 5.153835773468018, "rewards/real": 1.816572904586792, "step": 3100 }, { "epoch": 1.99, "learning_rate": 1.8708530805687203e-07, "logits/generated": 6.648950099945068, "logits/real": 4.868198871612549, "logps/generated": -311.92987060546875, "logps/real": -221.17819213867188, "loss": 0.05, "rewards/accuracies": 1.0, "rewards/generated": -3.4177818298339844, "rewards/margins": 5.747452259063721, "rewards/real": 2.3296706676483154, "step": 3110 }, { "epoch": 2.0, "learning_rate": 1.8590047393364928e-07, "logits/generated": 6.51468563079834, "logits/real": 4.445683479309082, "logps/generated": -308.3443908691406, "logps/real": -217.1921844482422, "loss": 0.0456, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.044935941696167, "rewards/margins": 5.253106594085693, "rewards/real": 2.2081711292266846, "step": 3120 }, { "epoch": 2.0, "learning_rate": 1.8471563981042655e-07, "logits/generated": 6.549835205078125, "logits/real": 4.801936149597168, "logps/generated": -295.6900939941406, "logps/real": -213.8479766845703, "loss": 0.0543, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.1135101318359375, "rewards/margins": 5.0333404541015625, "rewards/real": 1.9198299646377563, "step": 3130 }, { "epoch": 2.01, "learning_rate": 1.8353080568720377e-07, "logits/generated": 6.434506416320801, "logits/real": 4.751941204071045, "logps/generated": -318.86431884765625, "logps/real": -244.60107421875, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/generated": -3.4822006225585938, "rewards/margins": 5.774388313293457, "rewards/real": 2.2921876907348633, "step": 3140 }, { "epoch": 2.02, "learning_rate": 1.8234597156398104e-07, "logits/generated": 6.266590118408203, "logits/real": 4.948928356170654, "logps/generated": -310.56341552734375, "logps/real": -226.72293090820312, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/generated": -3.347864866256714, "rewards/margins": 5.35115909576416, "rewards/real": 2.003293991088867, "step": 3150 }, { "epoch": 2.02, "learning_rate": 1.811611374407583e-07, "logits/generated": 6.356667518615723, "logits/real": 4.884829521179199, "logps/generated": -313.6650390625, "logps/real": -234.5040283203125, "loss": 0.0393, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.3364055156707764, "rewards/margins": 5.497099876403809, "rewards/real": 2.160693645477295, "step": 3160 }, { "epoch": 2.03, "learning_rate": 1.7997630331753554e-07, "logits/generated": 6.548737525939941, "logits/real": 4.825071334838867, "logps/generated": -291.5633850097656, "logps/real": -233.65988159179688, "loss": 0.0505, "rewards/accuracies": 1.0, "rewards/generated": -3.0673575401306152, "rewards/margins": 5.524203300476074, "rewards/real": 2.456845760345459, "step": 3170 }, { "epoch": 2.03, "learning_rate": 1.7879146919431278e-07, "logits/generated": 6.497158050537109, "logits/real": 4.860651969909668, "logps/generated": -316.8197937011719, "logps/real": -223.147705078125, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/generated": -3.715630292892456, "rewards/margins": 5.779938697814941, "rewards/real": 2.064307689666748, "step": 3180 }, { "epoch": 2.04, "learning_rate": 1.7760663507109003e-07, "logits/generated": 6.6343674659729, "logits/real": 4.742542266845703, "logps/generated": -314.57635498046875, "logps/real": -218.7213897705078, "loss": 0.0589, "rewards/accuracies": 1.0, "rewards/generated": -3.649468183517456, "rewards/margins": 5.897641181945801, "rewards/real": 2.248173713684082, "step": 3190 }, { "epoch": 2.05, "learning_rate": 1.764218009478673e-07, "logits/generated": 6.572301387786865, "logits/real": 4.605647087097168, "logps/generated": -312.57159423828125, "logps/real": -218.27255249023438, "loss": 0.0499, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.4011642932891846, "rewards/margins": 5.888424396514893, "rewards/real": 2.487260341644287, "step": 3200 }, { "epoch": 2.05, "learning_rate": 1.7523696682464452e-07, "logits/generated": 6.526484489440918, "logits/real": 4.609063625335693, "logps/generated": -319.1774597167969, "logps/real": -225.0438232421875, "loss": 0.0432, "rewards/accuracies": 1.0, "rewards/generated": -3.5665206909179688, "rewards/margins": 5.571340084075928, "rewards/real": 2.004819393157959, "step": 3210 }, { "epoch": 2.06, "learning_rate": 1.740521327014218e-07, "logits/generated": 6.531280517578125, "logits/real": 4.885863780975342, "logps/generated": -315.1875915527344, "logps/real": -237.132568359375, "loss": 0.0469, "rewards/accuracies": 1.0, "rewards/generated": -3.1818227767944336, "rewards/margins": 5.084862232208252, "rewards/real": 1.9030392169952393, "step": 3220 }, { "epoch": 2.07, "learning_rate": 1.7286729857819904e-07, "logits/generated": 6.615921974182129, "logits/real": 4.542719841003418, "logps/generated": -314.04119873046875, "logps/real": -225.2353515625, "loss": 0.0352, "rewards/accuracies": 1.0, "rewards/generated": -3.191343069076538, "rewards/margins": 5.438819408416748, "rewards/real": 2.247476816177368, "step": 3230 }, { "epoch": 2.07, "learning_rate": 1.7168246445497631e-07, "logits/generated": 6.290173530578613, "logits/real": 4.586104869842529, "logps/generated": -313.9992370605469, "logps/real": -213.3672332763672, "loss": 0.0379, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.7760837078094482, "rewards/margins": 6.122511863708496, "rewards/real": 2.3464274406433105, "step": 3240 }, { "epoch": 2.08, "learning_rate": 1.7049763033175353e-07, "logits/generated": 6.58236837387085, "logits/real": 4.376986503601074, "logps/generated": -296.7496643066406, "logps/real": -215.1361083984375, "loss": 0.0364, "rewards/accuracies": 0.949999988079071, "rewards/generated": -3.280973434448242, "rewards/margins": 5.545414924621582, "rewards/real": 2.26444149017334, "step": 3250 }, { "epoch": 2.09, "learning_rate": 1.693127962085308e-07, "logits/generated": 6.613530158996582, "logits/real": 4.965182304382324, "logps/generated": -320.67425537109375, "logps/real": -241.35256958007812, "loss": 0.0375, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.3830809593200684, "rewards/margins": 5.788832664489746, "rewards/real": 2.4057505130767822, "step": 3260 }, { "epoch": 2.09, "learning_rate": 1.6812796208530805e-07, "logits/generated": 6.390702724456787, "logits/real": 4.728426933288574, "logps/generated": -312.08929443359375, "logps/real": -227.0480499267578, "loss": 0.038, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.2798690795898438, "rewards/margins": 5.261677265167236, "rewards/real": 1.9818084239959717, "step": 3270 }, { "epoch": 2.1, "learning_rate": 1.669431279620853e-07, "logits/generated": 6.487574100494385, "logits/real": 4.512204647064209, "logps/generated": -321.84368896484375, "logps/real": -240.1066131591797, "loss": 0.0496, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.5348918437957764, "rewards/margins": 5.897745132446289, "rewards/real": 2.3628532886505127, "step": 3280 }, { "epoch": 2.1, "learning_rate": 1.6575829383886255e-07, "logits/generated": 6.475184440612793, "logits/real": 4.735175132751465, "logps/generated": -296.4978942871094, "logps/real": -225.4010467529297, "loss": 0.053, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.1431491374969482, "rewards/margins": 5.334324359893799, "rewards/real": 2.1911754608154297, "step": 3290 }, { "epoch": 2.11, "learning_rate": 1.645734597156398e-07, "logits/generated": 6.46063756942749, "logits/real": 4.446132183074951, "logps/generated": -306.07568359375, "logps/real": -224.0647735595703, "loss": 0.0525, "rewards/accuracies": 1.0, "rewards/generated": -3.518324613571167, "rewards/margins": 5.946985721588135, "rewards/real": 2.4286608695983887, "step": 3300 }, { "epoch": 2.12, "learning_rate": 1.6338862559241706e-07, "logits/generated": 6.575872898101807, "logits/real": 4.777173042297363, "logps/generated": -312.64984130859375, "logps/real": -225.09823608398438, "loss": 0.0327, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.520355701446533, "rewards/margins": 5.975916862487793, "rewards/real": 2.4555611610412598, "step": 3310 }, { "epoch": 2.12, "learning_rate": 1.622037914691943e-07, "logits/generated": 6.580047607421875, "logits/real": 4.5617146492004395, "logps/generated": -303.8891296386719, "logps/real": -218.8231658935547, "loss": 0.0451, "rewards/accuracies": 1.0, "rewards/generated": -3.342977523803711, "rewards/margins": 5.351634502410889, "rewards/real": 2.0086567401885986, "step": 3320 }, { "epoch": 2.13, "learning_rate": 1.6101895734597156e-07, "logits/generated": 6.562408447265625, "logits/real": 5.205977916717529, "logps/generated": -321.8945617675781, "logps/real": -269.1778564453125, "loss": 0.0441, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.6907927989959717, "rewards/margins": 5.766333103179932, "rewards/real": 2.07554030418396, "step": 3330 }, { "epoch": 2.14, "learning_rate": 1.598341232227488e-07, "logits/generated": 6.638333320617676, "logits/real": 4.657790184020996, "logps/generated": -319.16754150390625, "logps/real": -232.8119354248047, "loss": 0.0498, "rewards/accuracies": 1.0, "rewards/generated": -3.34710693359375, "rewards/margins": 5.686694145202637, "rewards/real": 2.3395867347717285, "step": 3340 }, { "epoch": 2.14, "learning_rate": 1.5864928909952605e-07, "logits/generated": 6.637696743011475, "logits/real": 4.663809776306152, "logps/generated": -317.9059143066406, "logps/real": -223.9643096923828, "loss": 0.0544, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.5467820167541504, "rewards/margins": 5.613127708435059, "rewards/real": 2.066345691680908, "step": 3350 }, { "epoch": 2.15, "learning_rate": 1.5746445497630332e-07, "logits/generated": 6.5797553062438965, "logits/real": 5.032973289489746, "logps/generated": -309.12310791015625, "logps/real": -231.84146118164062, "loss": 0.0434, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.374034881591797, "rewards/margins": 5.373286247253418, "rewards/real": 1.9992516040802002, "step": 3360 }, { "epoch": 2.16, "learning_rate": 1.5627962085308054e-07, "logits/generated": 6.6527228355407715, "logits/real": 4.813700199127197, "logps/generated": -314.54132080078125, "logps/real": -244.5780487060547, "loss": 0.0367, "rewards/accuracies": 1.0, "rewards/generated": -3.4785728454589844, "rewards/margins": 5.757080078125, "rewards/real": 2.2785069942474365, "step": 3370 }, { "epoch": 2.16, "learning_rate": 1.5509478672985782e-07, "logits/generated": 6.394598007202148, "logits/real": 4.875939846038818, "logps/generated": -316.898681640625, "logps/real": -229.66049194335938, "loss": 0.0356, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.3502418994903564, "rewards/margins": 5.546257972717285, "rewards/real": 2.196016311645508, "step": 3380 }, { "epoch": 2.17, "learning_rate": 1.5390995260663506e-07, "logits/generated": 6.59166955947876, "logits/real": 4.965734004974365, "logps/generated": -299.8507385253906, "logps/real": -240.0013885498047, "loss": 0.0397, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.1730546951293945, "rewards/margins": 5.254295349121094, "rewards/real": 2.081240177154541, "step": 3390 }, { "epoch": 2.18, "learning_rate": 1.5272511848341233e-07, "logits/generated": 6.579958915710449, "logits/real": 4.6075544357299805, "logps/generated": -311.3309631347656, "logps/real": -231.1168975830078, "loss": 0.0377, "rewards/accuracies": 1.0, "rewards/generated": -3.3581135272979736, "rewards/margins": 5.885567665100098, "rewards/real": 2.5274531841278076, "step": 3400 }, { "epoch": 2.18, "learning_rate": 1.5154028436018955e-07, "logits/generated": 6.564838409423828, "logits/real": 4.323554515838623, "logps/generated": -314.01080322265625, "logps/real": -216.9060821533203, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/generated": -3.6513094902038574, "rewards/margins": 6.1155195236206055, "rewards/real": 2.464210033416748, "step": 3410 }, { "epoch": 2.19, "learning_rate": 1.5035545023696683e-07, "logits/generated": 6.470817565917969, "logits/real": 4.969902038574219, "logps/generated": -315.4042663574219, "logps/real": -210.75894165039062, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/generated": -3.6396255493164062, "rewards/margins": 5.780555248260498, "rewards/real": 2.140929698944092, "step": 3420 }, { "epoch": 2.19, "learning_rate": 1.4917061611374407e-07, "logits/generated": 6.28502082824707, "logits/real": 4.579121112823486, "logps/generated": -298.55078125, "logps/real": -227.2353057861328, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/generated": -3.269718647003174, "rewards/margins": 5.49901819229126, "rewards/real": 2.2292990684509277, "step": 3430 }, { "epoch": 2.2, "learning_rate": 1.4798578199052132e-07, "logits/generated": 6.529310703277588, "logits/real": 5.1802496910095215, "logps/generated": -314.0540466308594, "logps/real": -246.96493530273438, "loss": 0.0459, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.632396697998047, "rewards/margins": 5.8060173988342285, "rewards/real": 2.173621892929077, "step": 3440 }, { "epoch": 2.21, "learning_rate": 1.4680094786729857e-07, "logits/generated": 6.60665225982666, "logits/real": 4.755316257476807, "logps/generated": -315.1619873046875, "logps/real": -212.05435180664062, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/generated": -3.5212502479553223, "rewards/margins": 5.648011207580566, "rewards/real": 2.126760959625244, "step": 3450 }, { "epoch": 2.21, "learning_rate": 1.456161137440758e-07, "logits/generated": 6.501960754394531, "logits/real": 4.503037929534912, "logps/generated": -330.319580078125, "logps/real": -211.60977172851562, "loss": 0.0477, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.714958906173706, "rewards/margins": 5.880065441131592, "rewards/real": 2.1651062965393066, "step": 3460 }, { "epoch": 2.22, "learning_rate": 1.4443127962085309e-07, "logits/generated": 6.524314880371094, "logits/real": 4.682694911956787, "logps/generated": -318.777099609375, "logps/real": -249.3422088623047, "loss": 0.0234, "rewards/accuracies": 1.0, "rewards/generated": -3.846958875656128, "rewards/margins": 5.798233985900879, "rewards/real": 1.9512755870819092, "step": 3470 }, { "epoch": 2.23, "learning_rate": 1.4324644549763033e-07, "logits/generated": 6.539910793304443, "logits/real": 4.809814929962158, "logps/generated": -323.9732360839844, "logps/real": -247.67227172851562, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/generated": -3.5829575061798096, "rewards/margins": 5.772634506225586, "rewards/real": 2.1896770000457764, "step": 3480 }, { "epoch": 2.23, "learning_rate": 1.4206161137440758e-07, "logits/generated": 6.5540771484375, "logits/real": 4.512951850891113, "logps/generated": -315.17010498046875, "logps/real": -210.12588500976562, "loss": 0.0278, "rewards/accuracies": 1.0, "rewards/generated": -3.6037814617156982, "rewards/margins": 5.973625183105469, "rewards/real": 2.3698432445526123, "step": 3490 }, { "epoch": 2.24, "learning_rate": 1.4087677725118482e-07, "logits/generated": 6.513745307922363, "logits/real": 4.433924198150635, "logps/generated": -310.23089599609375, "logps/real": -203.8213348388672, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/generated": -3.4215285778045654, "rewards/margins": 5.807127952575684, "rewards/real": 2.385599374771118, "step": 3500 }, { "epoch": 2.25, "learning_rate": 1.396919431279621e-07, "logits/generated": 6.5156755447387695, "logits/real": 4.558250427246094, "logps/generated": -299.29339599609375, "logps/real": -218.6123046875, "loss": 0.035, "rewards/accuracies": 1.0, "rewards/generated": -3.3458030223846436, "rewards/margins": 5.673041820526123, "rewards/real": 2.3272387981414795, "step": 3510 }, { "epoch": 2.25, "learning_rate": 1.3850710900473934e-07, "logits/generated": 6.494305610656738, "logits/real": 5.065830707550049, "logps/generated": -316.78814697265625, "logps/real": -231.40750122070312, "loss": 0.0381, "rewards/accuracies": 1.0, "rewards/generated": -3.5985469818115234, "rewards/margins": 5.727441310882568, "rewards/real": 2.128894805908203, "step": 3520 }, { "epoch": 2.26, "learning_rate": 1.3732227488151656e-07, "logits/generated": 6.41510009765625, "logits/real": 4.9217329025268555, "logps/generated": -318.82440185546875, "logps/real": -246.50369262695312, "loss": 0.0435, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.5909526348114014, "rewards/margins": 6.108970642089844, "rewards/real": 2.5180177688598633, "step": 3530 }, { "epoch": 2.26, "learning_rate": 1.3613744075829384e-07, "logits/generated": 6.501974582672119, "logits/real": 4.702073097229004, "logps/generated": -306.13214111328125, "logps/real": -238.8610382080078, "loss": 0.0517, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.403776168823242, "rewards/margins": 5.4921417236328125, "rewards/real": 2.0883657932281494, "step": 3540 }, { "epoch": 2.27, "learning_rate": 1.3495260663507108e-07, "logits/generated": 6.479678153991699, "logits/real": 5.00525426864624, "logps/generated": -333.7792663574219, "logps/real": -235.4060516357422, "loss": 0.0294, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.638486385345459, "rewards/margins": 5.711910247802734, "rewards/real": 2.0734241008758545, "step": 3550 }, { "epoch": 2.28, "learning_rate": 1.3376777251184836e-07, "logits/generated": 6.646874904632568, "logits/real": 4.480135917663574, "logps/generated": -319.85870361328125, "logps/real": -205.182861328125, "loss": 0.0435, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.5380020141601562, "rewards/margins": 5.962095737457275, "rewards/real": 2.424093723297119, "step": 3560 }, { "epoch": 2.28, "learning_rate": 1.3258293838862558e-07, "logits/generated": 6.473311424255371, "logits/real": 4.572601795196533, "logps/generated": -313.9909973144531, "logps/real": -238.57861328125, "loss": 0.0355, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.488398790359497, "rewards/margins": 5.876242637634277, "rewards/real": 2.3878438472747803, "step": 3570 }, { "epoch": 2.29, "learning_rate": 1.3139810426540285e-07, "logits/generated": 6.481254577636719, "logits/real": 4.894389629364014, "logps/generated": -322.4656677246094, "logps/real": -231.29037475585938, "loss": 0.0429, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.569429874420166, "rewards/margins": 5.297967910766602, "rewards/real": 1.7285382747650146, "step": 3580 }, { "epoch": 2.3, "learning_rate": 1.302132701421801e-07, "logits/generated": 6.44034481048584, "logits/real": 4.870292663574219, "logps/generated": -309.8015441894531, "logps/real": -232.23141479492188, "loss": 0.0466, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.57190203666687, "rewards/margins": 5.714931488037109, "rewards/real": 2.1430296897888184, "step": 3590 }, { "epoch": 2.3, "learning_rate": 1.2902843601895734e-07, "logits/generated": 6.628100395202637, "logits/real": 4.56320333480835, "logps/generated": -314.26617431640625, "logps/real": -216.93435668945312, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/generated": -3.669429302215576, "rewards/margins": 5.688117980957031, "rewards/real": 2.018688201904297, "step": 3600 }, { "epoch": 2.31, "learning_rate": 1.278436018957346e-07, "logits/generated": 6.5281829833984375, "logits/real": 4.858192443847656, "logps/generated": -314.5399169921875, "logps/real": -231.82437133789062, "loss": 0.0529, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.659435987472534, "rewards/margins": 6.2276201248168945, "rewards/real": 2.5681843757629395, "step": 3610 }, { "epoch": 2.32, "learning_rate": 1.2665876777251183e-07, "logits/generated": 6.622697353363037, "logits/real": 4.539548397064209, "logps/generated": -316.33612060546875, "logps/real": -218.0032196044922, "loss": 0.0514, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.372077465057373, "rewards/margins": 5.659914493560791, "rewards/real": 2.287837505340576, "step": 3620 }, { "epoch": 2.32, "learning_rate": 1.254739336492891e-07, "logits/generated": 6.434654235839844, "logits/real": 5.143196105957031, "logps/generated": -318.66705322265625, "logps/real": -229.20541381835938, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/generated": -3.7129311561584473, "rewards/margins": 5.720685958862305, "rewards/real": 2.007754325866699, "step": 3630 }, { "epoch": 2.33, "learning_rate": 1.2428909952606635e-07, "logits/generated": 6.507617950439453, "logits/real": 4.2293171882629395, "logps/generated": -331.07684326171875, "logps/real": -223.13516235351562, "loss": 0.0279, "rewards/accuracies": 1.0, "rewards/generated": -4.012661933898926, "rewards/margins": 6.401325225830078, "rewards/real": 2.3886632919311523, "step": 3640 }, { "epoch": 2.34, "learning_rate": 1.231042654028436e-07, "logits/generated": 6.525460243225098, "logits/real": 4.729413986206055, "logps/generated": -307.34771728515625, "logps/real": -223.11129760742188, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/generated": -3.219517230987549, "rewards/margins": 5.384081840515137, "rewards/real": 2.164564609527588, "step": 3650 }, { "epoch": 2.34, "learning_rate": 1.2191943127962085e-07, "logits/generated": 6.551595211029053, "logits/real": 4.6989874839782715, "logps/generated": -298.13812255859375, "logps/real": -219.3531036376953, "loss": 0.0519, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.5793490409851074, "rewards/margins": 5.872319221496582, "rewards/real": 2.292970657348633, "step": 3660 }, { "epoch": 2.35, "learning_rate": 1.207345971563981e-07, "logits/generated": 6.519845485687256, "logits/real": 4.6157450675964355, "logps/generated": -313.74615478515625, "logps/real": -225.33602905273438, "loss": 0.044, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.348008394241333, "rewards/margins": 5.656905651092529, "rewards/real": 2.3088979721069336, "step": 3670 }, { "epoch": 2.35, "learning_rate": 1.1954976303317534e-07, "logits/generated": 6.551909446716309, "logits/real": 4.742045879364014, "logps/generated": -308.15435791015625, "logps/real": -239.033935546875, "loss": 0.0385, "rewards/accuracies": 1.0, "rewards/generated": -3.4503626823425293, "rewards/margins": 5.495227813720703, "rewards/real": 2.0448646545410156, "step": 3680 }, { "epoch": 2.36, "learning_rate": 1.183649289099526e-07, "logits/generated": 6.58068323135376, "logits/real": 4.629731178283691, "logps/generated": -316.3744812011719, "logps/real": -217.2517547607422, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/generated": -3.685664415359497, "rewards/margins": 6.379915714263916, "rewards/real": 2.6942508220672607, "step": 3690 }, { "epoch": 2.37, "learning_rate": 1.1718009478672986e-07, "logits/generated": 6.6019697189331055, "logits/real": 4.920955657958984, "logps/generated": -315.7640380859375, "logps/real": -215.07107543945312, "loss": 0.0342, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.7241485118865967, "rewards/margins": 5.824596405029297, "rewards/real": 2.1004483699798584, "step": 3700 }, { "epoch": 2.37, "learning_rate": 1.159952606635071e-07, "logits/generated": 6.5531206130981445, "logits/real": 4.487125396728516, "logps/generated": -308.80859375, "logps/real": -211.96389770507812, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/generated": -3.3599178791046143, "rewards/margins": 6.051535606384277, "rewards/real": 2.6916182041168213, "step": 3710 }, { "epoch": 2.38, "learning_rate": 1.1481042654028436e-07, "logits/generated": 6.397873878479004, "logits/real": 4.594054698944092, "logps/generated": -307.98046875, "logps/real": -209.40310668945312, "loss": 0.0471, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.6652374267578125, "rewards/margins": 6.013858795166016, "rewards/real": 2.348621129989624, "step": 3720 }, { "epoch": 2.39, "learning_rate": 1.136255924170616e-07, "logits/generated": 6.375195026397705, "logits/real": 4.987454414367676, "logps/generated": -317.755859375, "logps/real": -241.71255493164062, "loss": 0.0497, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.5185465812683105, "rewards/margins": 5.797640800476074, "rewards/real": 2.2790942192077637, "step": 3730 }, { "epoch": 2.39, "learning_rate": 1.1244075829383886e-07, "logits/generated": 6.541880130767822, "logits/real": 4.569333553314209, "logps/generated": -318.06109619140625, "logps/real": -230.28421020507812, "loss": 0.0618, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.4923720359802246, "rewards/margins": 5.879878044128418, "rewards/real": 2.3875060081481934, "step": 3740 }, { "epoch": 2.4, "learning_rate": 1.112559241706161e-07, "logits/generated": 6.615804195404053, "logits/real": 4.598508358001709, "logps/generated": -315.1165771484375, "logps/real": -230.8600616455078, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/generated": -3.722174882888794, "rewards/margins": 6.234826564788818, "rewards/real": 2.512650966644287, "step": 3750 }, { "epoch": 2.41, "learning_rate": 1.1007109004739336e-07, "logits/generated": 6.621249198913574, "logits/real": 4.556908130645752, "logps/generated": -320.9785461425781, "logps/real": -225.3008575439453, "loss": 0.0337, "rewards/accuracies": 1.0, "rewards/generated": -3.7469630241394043, "rewards/margins": 6.285904884338379, "rewards/real": 2.5389418601989746, "step": 3760 }, { "epoch": 2.41, "learning_rate": 1.0888625592417061e-07, "logits/generated": 6.390491962432861, "logits/real": 4.773979187011719, "logps/generated": -312.04071044921875, "logps/real": -249.9011688232422, "loss": 0.041, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.3887813091278076, "rewards/margins": 5.657594203948975, "rewards/real": 2.268812894821167, "step": 3770 }, { "epoch": 2.42, "learning_rate": 1.0770142180094787e-07, "logits/generated": 6.476538181304932, "logits/real": 4.669577598571777, "logps/generated": -312.90032958984375, "logps/real": -213.92166137695312, "loss": 0.0449, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.6199536323547363, "rewards/margins": 5.9859161376953125, "rewards/real": 2.365962505340576, "step": 3780 }, { "epoch": 2.42, "learning_rate": 1.0651658767772511e-07, "logits/generated": 6.476927757263184, "logits/real": 4.563714504241943, "logps/generated": -315.058837890625, "logps/real": -224.32577514648438, "loss": 0.049, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.7334506511688232, "rewards/margins": 6.3079400062561035, "rewards/real": 2.5744881629943848, "step": 3790 }, { "epoch": 2.43, "learning_rate": 1.0533175355450237e-07, "logits/generated": 6.539498329162598, "logits/real": 4.8311848640441895, "logps/generated": -318.9219970703125, "logps/real": -240.50894165039062, "loss": 0.0457, "rewards/accuracies": 1.0, "rewards/generated": -3.6386666297912598, "rewards/margins": 6.052326679229736, "rewards/real": 2.4136602878570557, "step": 3800 }, { "epoch": 2.44, "learning_rate": 1.0414691943127962e-07, "logits/generated": 6.435731410980225, "logits/real": 4.929614067077637, "logps/generated": -316.7015686035156, "logps/real": -229.21322631835938, "loss": 0.043, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.8294575214385986, "rewards/margins": 6.2790374755859375, "rewards/real": 2.4495797157287598, "step": 3810 }, { "epoch": 2.44, "learning_rate": 1.0296208530805687e-07, "logits/generated": 6.627385139465332, "logits/real": 4.485804557800293, "logps/generated": -309.6326599121094, "logps/real": -211.5670166015625, "loss": 0.045, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -3.6681857109069824, "rewards/margins": 6.135626316070557, "rewards/real": 2.467440128326416, "step": 3820 }, { "epoch": 2.45, "learning_rate": 1.0177725118483411e-07, "logits/generated": 6.6564741134643555, "logits/real": 4.68516206741333, "logps/generated": -318.8467712402344, "logps/real": -226.6106414794922, "loss": 0.051, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.5700950622558594, "rewards/margins": 5.723981857299805, "rewards/real": 2.153886556625366, "step": 3830 }, { "epoch": 2.46, "learning_rate": 1.0059241706161137e-07, "logits/generated": 6.6741204261779785, "logits/real": 4.536016941070557, "logps/generated": -324.20159912109375, "logps/real": -207.59896850585938, "loss": 0.0416, "rewards/accuracies": 1.0, "rewards/generated": -3.788970470428467, "rewards/margins": 6.435559272766113, "rewards/real": 2.6465885639190674, "step": 3840 }, { "epoch": 2.46, "learning_rate": 9.940758293838862e-08, "logits/generated": 6.565242767333984, "logits/real": 4.836869239807129, "logps/generated": -303.9171447753906, "logps/real": -200.09014892578125, "loss": 0.0259, "rewards/accuracies": 1.0, "rewards/generated": -3.631394147872925, "rewards/margins": 5.958956718444824, "rewards/real": 2.327561855316162, "step": 3850 }, { "epoch": 2.47, "learning_rate": 9.822274881516588e-08, "logits/generated": 6.608918190002441, "logits/real": 4.765702724456787, "logps/generated": -311.97247314453125, "logps/real": -218.27175903320312, "loss": 0.0397, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.8027045726776123, "rewards/margins": 6.115901470184326, "rewards/real": 2.3131978511810303, "step": 3860 }, { "epoch": 2.48, "learning_rate": 9.703791469194312e-08, "logits/generated": 6.5209455490112305, "logits/real": 4.784424781799316, "logps/generated": -316.18646240234375, "logps/real": -219.3183135986328, "loss": 0.0465, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.898909330368042, "rewards/margins": 5.930804252624512, "rewards/real": 2.0318946838378906, "step": 3870 }, { "epoch": 2.48, "learning_rate": 9.585308056872038e-08, "logits/generated": 6.519847869873047, "logits/real": 4.682705879211426, "logps/generated": -316.4295349121094, "logps/real": -221.3084259033203, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/generated": -3.6345527172088623, "rewards/margins": 5.830044746398926, "rewards/real": 2.1954920291900635, "step": 3880 }, { "epoch": 2.49, "learning_rate": 9.466824644549763e-08, "logits/generated": 6.437973976135254, "logits/real": 4.786694526672363, "logps/generated": -308.895263671875, "logps/real": -247.1173095703125, "loss": 0.0299, "rewards/accuracies": 1.0, "rewards/generated": -3.685072422027588, "rewards/margins": 5.676372051239014, "rewards/real": 1.9912999868392944, "step": 3890 }, { "epoch": 2.5, "learning_rate": 9.348341232227488e-08, "logits/generated": 6.578193664550781, "logits/real": 5.0969648361206055, "logps/generated": -321.40362548828125, "logps/real": -244.7572479248047, "loss": 0.0371, "rewards/accuracies": 1.0, "rewards/generated": -3.6856868267059326, "rewards/margins": 5.648140907287598, "rewards/real": 1.9624547958374023, "step": 3900 }, { "epoch": 2.5, "learning_rate": 9.229857819905212e-08, "logits/generated": 6.4852423667907715, "logits/real": 4.911639213562012, "logps/generated": -313.97930908203125, "logps/real": -257.989013671875, "loss": 0.0382, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.478628635406494, "rewards/margins": 5.914477348327637, "rewards/real": 2.435849666595459, "step": 3910 }, { "epoch": 2.51, "learning_rate": 9.111374407582938e-08, "logits/generated": 6.461939811706543, "logits/real": 4.53496789932251, "logps/generated": -324.73724365234375, "logps/real": -236.884521484375, "loss": 0.0395, "rewards/accuracies": 1.0, "rewards/generated": -3.916443347930908, "rewards/margins": 5.9058918952941895, "rewards/real": 1.9894483089447021, "step": 3920 }, { "epoch": 2.51, "learning_rate": 8.992890995260663e-08, "logits/generated": 6.4986162185668945, "logits/real": 4.736280918121338, "logps/generated": -315.95770263671875, "logps/real": -232.65249633789062, "loss": 0.0334, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.5177340507507324, "rewards/margins": 5.86563777923584, "rewards/real": 2.3479039669036865, "step": 3930 }, { "epoch": 2.52, "learning_rate": 8.874407582938389e-08, "logits/generated": 6.594623565673828, "logits/real": 4.596661567687988, "logps/generated": -294.2056579589844, "logps/real": -208.23135375976562, "loss": 0.0376, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.553370714187622, "rewards/margins": 5.874009609222412, "rewards/real": 2.3206381797790527, "step": 3940 }, { "epoch": 2.53, "learning_rate": 8.755924170616114e-08, "logits/generated": 6.560601234436035, "logits/real": 4.890211582183838, "logps/generated": -320.5434875488281, "logps/real": -234.8424835205078, "loss": 0.0322, "rewards/accuracies": 1.0, "rewards/generated": -3.717367649078369, "rewards/margins": 5.8168792724609375, "rewards/real": 2.0995113849639893, "step": 3950 }, { "epoch": 2.53, "learning_rate": 8.63744075829384e-08, "logits/generated": 6.643453121185303, "logits/real": 4.418960094451904, "logps/generated": -308.0656433105469, "logps/real": -188.0706787109375, "loss": 0.0344, "rewards/accuracies": 1.0, "rewards/generated": -3.6177737712860107, "rewards/margins": 6.433516502380371, "rewards/real": 2.8157434463500977, "step": 3960 }, { "epoch": 2.54, "learning_rate": 8.518957345971564e-08, "logits/generated": 6.609760284423828, "logits/real": 4.693791389465332, "logps/generated": -311.10589599609375, "logps/real": -222.8950653076172, "loss": 0.0359, "rewards/accuracies": 1.0, "rewards/generated": -3.7364165782928467, "rewards/margins": 6.025433540344238, "rewards/real": 2.2890164852142334, "step": 3970 }, { "epoch": 2.55, "learning_rate": 8.40047393364929e-08, "logits/generated": 6.644488334655762, "logits/real": 4.728980541229248, "logps/generated": -326.75518798828125, "logps/real": -216.3810577392578, "loss": 0.0326, "rewards/accuracies": 1.0, "rewards/generated": -3.6819024085998535, "rewards/margins": 6.162990570068359, "rewards/real": 2.4810874462127686, "step": 3980 }, { "epoch": 2.55, "learning_rate": 8.281990521327013e-08, "logits/generated": 6.576291561126709, "logits/real": 4.637971878051758, "logps/generated": -309.2288513183594, "logps/real": -236.91561889648438, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/generated": -3.5884525775909424, "rewards/margins": 5.920731544494629, "rewards/real": 2.3322794437408447, "step": 3990 }, { "epoch": 2.56, "learning_rate": 8.163507109004738e-08, "logits/generated": 6.310732841491699, "logits/real": 4.888187885284424, "logps/generated": -311.5484924316406, "logps/real": -224.40628051757812, "loss": 0.0343, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.938901901245117, "rewards/margins": 6.3101019859313965, "rewards/real": 2.3712000846862793, "step": 4000 }, { "epoch": 2.57, "learning_rate": 8.045023696682464e-08, "logits/generated": 6.325362205505371, "logits/real": 5.112117767333984, "logps/generated": -311.65325927734375, "logps/real": -254.33285522460938, "loss": 0.0511, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.4007866382598877, "rewards/margins": 5.50778865814209, "rewards/real": 2.1070024967193604, "step": 4010 }, { "epoch": 2.57, "learning_rate": 7.926540284360189e-08, "logits/generated": 6.5289106369018555, "logits/real": 4.755041599273682, "logps/generated": -333.82012939453125, "logps/real": -237.4073486328125, "loss": 0.0326, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.6312661170959473, "rewards/margins": 6.053757667541504, "rewards/real": 2.4224915504455566, "step": 4020 }, { "epoch": 2.58, "learning_rate": 7.808056872037915e-08, "logits/generated": 6.4816789627075195, "logits/real": 4.328751087188721, "logps/generated": -322.64190673828125, "logps/real": -216.17178344726562, "loss": 0.0482, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.9149162769317627, "rewards/margins": 6.450268745422363, "rewards/real": 2.5353522300720215, "step": 4030 }, { "epoch": 2.58, "learning_rate": 7.689573459715639e-08, "logits/generated": 6.6160569190979, "logits/real": 4.253937244415283, "logps/generated": -321.759765625, "logps/real": -202.4170379638672, "loss": 0.0239, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.693296432495117, "rewards/margins": 5.80830192565918, "rewards/real": 2.1150054931640625, "step": 4040 }, { "epoch": 2.59, "learning_rate": 7.571090047393365e-08, "logits/generated": 6.380744934082031, "logits/real": 4.77320671081543, "logps/generated": -306.7076110839844, "logps/real": -220.3052215576172, "loss": 0.0338, "rewards/accuracies": 1.0, "rewards/generated": -3.4694228172302246, "rewards/margins": 5.889524936676025, "rewards/real": 2.42010235786438, "step": 4050 }, { "epoch": 2.6, "learning_rate": 7.45260663507109e-08, "logits/generated": 6.456230163574219, "logits/real": 4.660184860229492, "logps/generated": -309.7335510253906, "logps/real": -223.43399047851562, "loss": 0.0355, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.4624855518341064, "rewards/margins": 5.547595500946045, "rewards/real": 2.0851101875305176, "step": 4060 }, { "epoch": 2.6, "learning_rate": 7.334123222748814e-08, "logits/generated": 6.5137529373168945, "logits/real": 4.8226494789123535, "logps/generated": -320.96771240234375, "logps/real": -244.80807495117188, "loss": 0.0315, "rewards/accuracies": 1.0, "rewards/generated": -3.9199230670928955, "rewards/margins": 6.18032693862915, "rewards/real": 2.2604031562805176, "step": 4070 }, { "epoch": 2.61, "learning_rate": 7.215639810426539e-08, "logits/generated": 6.472892761230469, "logits/real": 4.793478012084961, "logps/generated": -321.29473876953125, "logps/real": -232.20382690429688, "loss": 0.0445, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.531625270843506, "rewards/margins": 6.006019592285156, "rewards/real": 2.4743950366973877, "step": 4080 }, { "epoch": 2.62, "learning_rate": 7.097156398104265e-08, "logits/generated": 6.526330471038818, "logits/real": 4.876141548156738, "logps/generated": -308.0903625488281, "logps/real": -241.17123413085938, "loss": 0.0497, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.8017773628234863, "rewards/margins": 6.233469486236572, "rewards/real": 2.431692123413086, "step": 4090 }, { "epoch": 2.62, "learning_rate": 6.97867298578199e-08, "logits/generated": 6.507188320159912, "logits/real": 4.934351444244385, "logps/generated": -313.9234619140625, "logps/real": -229.16958618164062, "loss": 0.0412, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.8360989093780518, "rewards/margins": 5.914695739746094, "rewards/real": 2.0785973072052, "step": 4100 }, { "epoch": 2.63, "learning_rate": 6.860189573459716e-08, "logits/generated": 6.5582594871521, "logits/real": 4.455166339874268, "logps/generated": -316.6048583984375, "logps/real": -216.8268280029297, "loss": 0.0314, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.8321373462677, "rewards/margins": 6.344393730163574, "rewards/real": 2.512256383895874, "step": 4110 }, { "epoch": 2.64, "learning_rate": 6.74170616113744e-08, "logits/generated": 6.617920875549316, "logits/real": 4.433808326721191, "logps/generated": -310.179931640625, "logps/real": -225.44900512695312, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/generated": -3.6572768688201904, "rewards/margins": 6.027648448944092, "rewards/real": 2.3703715801239014, "step": 4120 }, { "epoch": 2.64, "learning_rate": 6.623222748815166e-08, "logits/generated": 6.532803535461426, "logits/real": 4.4606032371521, "logps/generated": -305.09393310546875, "logps/real": -212.5522918701172, "loss": 0.0441, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.38942289352417, "rewards/margins": 5.628909587860107, "rewards/real": 2.2394864559173584, "step": 4130 }, { "epoch": 2.65, "learning_rate": 6.504739336492891e-08, "logits/generated": 6.532896995544434, "logits/real": 4.811502456665039, "logps/generated": -321.75567626953125, "logps/real": -236.20590209960938, "loss": 0.0334, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.9196619987487793, "rewards/margins": 6.389462471008301, "rewards/real": 2.4698009490966797, "step": 4140 }, { "epoch": 2.66, "learning_rate": 6.386255924170615e-08, "logits/generated": 6.573578834533691, "logits/real": 4.610198497772217, "logps/generated": -330.74285888671875, "logps/real": -208.0010986328125, "loss": 0.0436, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.901912212371826, "rewards/margins": 6.33354377746582, "rewards/real": 2.431631326675415, "step": 4150 }, { "epoch": 2.66, "learning_rate": 6.26777251184834e-08, "logits/generated": 6.6127519607543945, "logits/real": 4.326380252838135, "logps/generated": -313.0176696777344, "logps/real": -204.0683135986328, "loss": 0.0283, "rewards/accuracies": 1.0, "rewards/generated": -3.7593460083007812, "rewards/margins": 6.059396266937256, "rewards/real": 2.3000504970550537, "step": 4160 }, { "epoch": 2.67, "learning_rate": 6.149289099526066e-08, "logits/generated": 6.535134792327881, "logits/real": 4.897824287414551, "logps/generated": -308.34429931640625, "logps/real": -220.6827392578125, "loss": 0.0532, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -3.668942928314209, "rewards/margins": 5.9941911697387695, "rewards/real": 2.325247287750244, "step": 4170 }, { "epoch": 2.67, "learning_rate": 6.030805687203791e-08, "logits/generated": 6.5091376304626465, "logits/real": 4.820844650268555, "logps/generated": -324.505859375, "logps/real": -224.4438018798828, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/generated": -3.988135814666748, "rewards/margins": 6.7851080894470215, "rewards/real": 2.7969725131988525, "step": 4180 }, { "epoch": 2.68, "learning_rate": 5.912322274881516e-08, "logits/generated": 6.506842136383057, "logits/real": 4.628279685974121, "logps/generated": -311.8347473144531, "logps/real": -239.9702606201172, "loss": 0.0313, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.3794751167297363, "rewards/margins": 5.759786605834961, "rewards/real": 2.380312204360962, "step": 4190 }, { "epoch": 2.69, "learning_rate": 5.793838862559241e-08, "logits/generated": 6.515402793884277, "logits/real": 4.95352840423584, "logps/generated": -318.76495361328125, "logps/real": -234.7864227294922, "loss": 0.0425, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.8171284198760986, "rewards/margins": 6.229681968688965, "rewards/real": 2.412553071975708, "step": 4200 }, { "epoch": 2.69, "learning_rate": 5.6753554502369666e-08, "logits/generated": 6.587221622467041, "logits/real": 4.416947364807129, "logps/generated": -324.5755310058594, "logps/real": -213.75076293945312, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/generated": -3.916365146636963, "rewards/margins": 6.1400957107543945, "rewards/real": 2.22373104095459, "step": 4210 }, { "epoch": 2.7, "learning_rate": 5.556872037914691e-08, "logits/generated": 6.564489841461182, "logits/real": 4.871767997741699, "logps/generated": -308.17706298828125, "logps/real": -221.3224639892578, "loss": 0.0365, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.5159783363342285, "rewards/margins": 5.63589334487915, "rewards/real": 2.11991548538208, "step": 4220 }, { "epoch": 2.71, "learning_rate": 5.4383886255924165e-08, "logits/generated": 6.402759552001953, "logits/real": 4.591031074523926, "logps/generated": -328.0219421386719, "logps/real": -236.8424072265625, "loss": 0.0356, "rewards/accuracies": 1.0, "rewards/generated": -3.7980494499206543, "rewards/margins": 6.346069812774658, "rewards/real": 2.5480198860168457, "step": 4230 }, { "epoch": 2.71, "learning_rate": 5.319905213270142e-08, "logits/generated": 6.591435432434082, "logits/real": 4.606555938720703, "logps/generated": -303.12579345703125, "logps/real": -228.23226928710938, "loss": 0.0447, "rewards/accuracies": 1.0, "rewards/generated": -3.5328078269958496, "rewards/margins": 6.194054126739502, "rewards/real": 2.6612462997436523, "step": 4240 }, { "epoch": 2.72, "learning_rate": 5.201421800947867e-08, "logits/generated": 6.483080863952637, "logits/real": 4.758042335510254, "logps/generated": -307.1785583496094, "logps/real": -221.1710662841797, "loss": 0.0277, "rewards/accuracies": 1.0, "rewards/generated": -3.7217071056365967, "rewards/margins": 6.27499532699585, "rewards/real": 2.553287982940674, "step": 4250 }, { "epoch": 2.73, "learning_rate": 5.082938388625592e-08, "logits/generated": 6.502976417541504, "logits/real": 4.846578121185303, "logps/generated": -317.02081298828125, "logps/real": -225.8799591064453, "loss": 0.037, "rewards/accuracies": 1.0, "rewards/generated": -3.6737067699432373, "rewards/margins": 5.691414833068848, "rewards/real": 2.0177078247070312, "step": 4260 }, { "epoch": 2.73, "learning_rate": 4.964454976303317e-08, "logits/generated": 6.714502811431885, "logits/real": 4.4899725914001465, "logps/generated": -320.1687316894531, "logps/real": -220.10842895507812, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/generated": -3.954611301422119, "rewards/margins": 6.4107770919799805, "rewards/real": 2.4561660289764404, "step": 4270 }, { "epoch": 2.74, "learning_rate": 4.845971563981042e-08, "logits/generated": 6.47867488861084, "logits/real": 4.510740756988525, "logps/generated": -313.6661682128906, "logps/real": -206.0166015625, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/generated": -3.6164774894714355, "rewards/margins": 5.649336814880371, "rewards/real": 2.0328593254089355, "step": 4280 }, { "epoch": 2.74, "learning_rate": 4.7274881516587676e-08, "logits/generated": 6.634936332702637, "logits/real": 4.846175670623779, "logps/generated": -327.55364990234375, "logps/real": -215.91610717773438, "loss": 0.0275, "rewards/accuracies": 0.987500011920929, "rewards/generated": -4.14184045791626, "rewards/margins": 6.476358890533447, "rewards/real": 2.3345181941986084, "step": 4290 }, { "epoch": 2.75, "learning_rate": 4.609004739336492e-08, "logits/generated": 6.581854820251465, "logits/real": 4.857382774353027, "logps/generated": -305.4502258300781, "logps/real": -209.5597381591797, "loss": 0.0448, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.655374526977539, "rewards/margins": 6.246061325073242, "rewards/real": 2.5906870365142822, "step": 4300 }, { "epoch": 2.76, "learning_rate": 4.4905213270142176e-08, "logits/generated": 6.5722198486328125, "logits/real": 4.7393646240234375, "logps/generated": -326.43719482421875, "logps/real": -233.70809936523438, "loss": 0.0363, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.8187355995178223, "rewards/margins": 6.1232428550720215, "rewards/real": 2.3045077323913574, "step": 4310 }, { "epoch": 2.76, "learning_rate": 4.372037914691943e-08, "logits/generated": 6.555207252502441, "logits/real": 4.758434295654297, "logps/generated": -326.94232177734375, "logps/real": -235.101806640625, "loss": 0.025, "rewards/accuracies": 1.0, "rewards/generated": -3.6171951293945312, "rewards/margins": 6.253279685974121, "rewards/real": 2.636084794998169, "step": 4320 }, { "epoch": 2.77, "learning_rate": 4.253554502369668e-08, "logits/generated": 6.525106906890869, "logits/real": 5.279546737670898, "logps/generated": -331.38818359375, "logps/real": -274.1283264160156, "loss": 0.0272, "rewards/accuracies": 1.0, "rewards/generated": -3.6407923698425293, "rewards/margins": 6.059579372406006, "rewards/real": 2.4187867641448975, "step": 4330 }, { "epoch": 2.78, "learning_rate": 4.135071090047393e-08, "logits/generated": 6.604770660400391, "logits/real": 4.898898601531982, "logps/generated": -330.6890563964844, "logps/real": -254.55038452148438, "loss": 0.0362, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.8300297260284424, "rewards/margins": 6.490457057952881, "rewards/real": 2.6604273319244385, "step": 4340 }, { "epoch": 2.78, "learning_rate": 4.016587677725118e-08, "logits/generated": 6.4878034591674805, "logits/real": 4.653387069702148, "logps/generated": -306.11895751953125, "logps/real": -233.1724395751953, "loss": 0.0334, "rewards/accuracies": 1.0, "rewards/generated": -3.7691116333007812, "rewards/margins": 6.210760593414307, "rewards/real": 2.441648006439209, "step": 4350 }, { "epoch": 2.79, "learning_rate": 3.8981042654028434e-08, "logits/generated": 6.604249477386475, "logits/real": 4.4642558097839355, "logps/generated": -328.152099609375, "logps/real": -215.21151733398438, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/generated": -3.84818696975708, "rewards/margins": 6.3337178230285645, "rewards/real": 2.485531806945801, "step": 4360 }, { "epoch": 2.8, "learning_rate": 3.779620853080569e-08, "logits/generated": 6.567579746246338, "logits/real": 4.3342695236206055, "logps/generated": -319.4460144042969, "logps/real": -200.232177734375, "loss": 0.0331, "rewards/accuracies": 1.0, "rewards/generated": -3.974278211593628, "rewards/margins": 6.716505527496338, "rewards/real": 2.7422266006469727, "step": 4370 }, { "epoch": 2.8, "learning_rate": 3.661137440758294e-08, "logits/generated": 6.469827175140381, "logits/real": 4.755372047424316, "logps/generated": -321.1123046875, "logps/real": -220.9445343017578, "loss": 0.0342, "rewards/accuracies": 1.0, "rewards/generated": -3.819784641265869, "rewards/margins": 6.050178050994873, "rewards/real": 2.230393171310425, "step": 4380 }, { "epoch": 2.81, "learning_rate": 3.5426540284360186e-08, "logits/generated": 6.43978214263916, "logits/real": 5.01400899887085, "logps/generated": -317.6650390625, "logps/real": -221.50173950195312, "loss": 0.0359, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.7374751567840576, "rewards/margins": 6.025723934173584, "rewards/real": 2.2882485389709473, "step": 4390 }, { "epoch": 2.82, "learning_rate": 3.424170616113744e-08, "logits/generated": 6.483642578125, "logits/real": 4.6836934089660645, "logps/generated": -328.49017333984375, "logps/real": -233.32470703125, "loss": 0.0285, "rewards/accuracies": 1.0, "rewards/generated": -4.040391445159912, "rewards/margins": 6.6246747970581055, "rewards/real": 2.5842835903167725, "step": 4400 }, { "epoch": 2.82, "learning_rate": 3.305687203791469e-08, "logits/generated": 6.557009220123291, "logits/real": 4.679731369018555, "logps/generated": -314.7176513671875, "logps/real": -220.220458984375, "loss": 0.0218, "rewards/accuracies": 1.0, "rewards/generated": -3.6416420936584473, "rewards/margins": 6.076503753662109, "rewards/real": 2.4348621368408203, "step": 4410 }, { "epoch": 2.83, "learning_rate": 3.1872037914691945e-08, "logits/generated": 6.50725793838501, "logits/real": 4.735629558563232, "logps/generated": -319.73260498046875, "logps/real": -205.85287475585938, "loss": 0.0332, "rewards/accuracies": 1.0, "rewards/generated": -3.5968334674835205, "rewards/margins": 5.893436908721924, "rewards/real": 2.2966036796569824, "step": 4420 }, { "epoch": 2.83, "learning_rate": 3.068720379146919e-08, "logits/generated": 6.5144171714782715, "logits/real": 4.513826847076416, "logps/generated": -318.6091613769531, "logps/real": -218.3349151611328, "loss": 0.041, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.7594285011291504, "rewards/margins": 6.054383754730225, "rewards/real": 2.2949557304382324, "step": 4430 }, { "epoch": 2.84, "learning_rate": 2.9502369668246444e-08, "logits/generated": 6.47725772857666, "logits/real": 4.8379034996032715, "logps/generated": -318.28778076171875, "logps/real": -225.16415405273438, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/generated": -3.90997052192688, "rewards/margins": 6.520627021789551, "rewards/real": 2.6106560230255127, "step": 4440 }, { "epoch": 2.85, "learning_rate": 2.8317535545023697e-08, "logits/generated": 6.458238124847412, "logits/real": 4.9540228843688965, "logps/generated": -315.47100830078125, "logps/real": -216.9081268310547, "loss": 0.0361, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.684654712677002, "rewards/margins": 6.162962436676025, "rewards/real": 2.4783077239990234, "step": 4450 }, { "epoch": 2.85, "learning_rate": 2.7132701421800947e-08, "logits/generated": 6.424310207366943, "logits/real": 4.891389846801758, "logps/generated": -316.9339904785156, "logps/real": -235.1954345703125, "loss": 0.0367, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.7844338417053223, "rewards/margins": 6.09124755859375, "rewards/real": 2.306814193725586, "step": 4460 }, { "epoch": 2.86, "learning_rate": 2.59478672985782e-08, "logits/generated": 6.582629203796387, "logits/real": 4.89013671875, "logps/generated": -328.97601318359375, "logps/real": -229.2420654296875, "loss": 0.0387, "rewards/accuracies": 1.0, "rewards/generated": -3.919386386871338, "rewards/margins": 6.390198707580566, "rewards/real": 2.4708125591278076, "step": 4470 }, { "epoch": 2.87, "learning_rate": 2.476303317535545e-08, "logits/generated": 6.474552154541016, "logits/real": 4.8046979904174805, "logps/generated": -309.959716796875, "logps/real": -234.17251586914062, "loss": 0.0466, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -3.949699878692627, "rewards/margins": 6.123934745788574, "rewards/real": 2.1742348670959473, "step": 4480 }, { "epoch": 2.87, "learning_rate": 2.3578199052132702e-08, "logits/generated": 6.575322151184082, "logits/real": 4.4408369064331055, "logps/generated": -315.2879333496094, "logps/real": -226.00564575195312, "loss": 0.0441, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.301480770111084, "rewards/margins": 5.636345863342285, "rewards/real": 2.3348641395568848, "step": 4490 }, { "epoch": 2.88, "learning_rate": 2.239336492890995e-08, "logits/generated": 6.517224311828613, "logits/real": 4.958826065063477, "logps/generated": -314.48382568359375, "logps/real": -232.69461059570312, "loss": 0.0376, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.824894428253174, "rewards/margins": 6.043306827545166, "rewards/real": 2.2184131145477295, "step": 4500 }, { "epoch": 2.89, "learning_rate": 2.1208530805687202e-08, "logits/generated": 6.625432014465332, "logits/real": 4.315027713775635, "logps/generated": -319.8591003417969, "logps/real": -203.87966918945312, "loss": 0.0411, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.930284023284912, "rewards/margins": 6.055028915405273, "rewards/real": 2.1247451305389404, "step": 4510 }, { "epoch": 2.89, "learning_rate": 2.002369668246445e-08, "logits/generated": 6.50540828704834, "logits/real": 5.131866455078125, "logps/generated": -318.6473693847656, "logps/real": -250.11904907226562, "loss": 0.0387, "rewards/accuracies": 1.0, "rewards/generated": -3.9697766304016113, "rewards/margins": 5.962163925170898, "rewards/real": 1.9923874139785767, "step": 4520 }, { "epoch": 2.9, "learning_rate": 1.8838862559241704e-08, "logits/generated": 6.527606964111328, "logits/real": 4.929044723510742, "logps/generated": -307.16680908203125, "logps/real": -220.23678588867188, "loss": 0.0262, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.7975895404815674, "rewards/margins": 6.474527835845947, "rewards/real": 2.67693829536438, "step": 4530 }, { "epoch": 2.9, "learning_rate": 1.7654028436018954e-08, "logits/generated": 6.473954677581787, "logits/real": 4.672419548034668, "logps/generated": -316.3739013671875, "logps/real": -218.72024536132812, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/generated": -3.7095909118652344, "rewards/margins": 6.246824741363525, "rewards/real": 2.537233829498291, "step": 4540 }, { "epoch": 2.91, "learning_rate": 1.6469194312796207e-08, "logits/generated": 6.356590747833252, "logits/real": 5.3553056716918945, "logps/generated": -313.9323425292969, "logps/real": -238.56777954101562, "loss": 0.0336, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -4.027235984802246, "rewards/margins": 6.5537543296813965, "rewards/real": 2.5265183448791504, "step": 4550 }, { "epoch": 2.92, "learning_rate": 1.528436018957346e-08, "logits/generated": 6.637836456298828, "logits/real": 4.594275951385498, "logps/generated": -310.762939453125, "logps/real": -227.982177734375, "loss": 0.0364, "rewards/accuracies": 1.0, "rewards/generated": -3.674046754837036, "rewards/margins": 6.259788990020752, "rewards/real": 2.585742473602295, "step": 4560 }, { "epoch": 2.92, "learning_rate": 1.409952606635071e-08, "logits/generated": 6.587820529937744, "logits/real": 4.828711032867432, "logps/generated": -322.9399719238281, "logps/real": -231.203857421875, "loss": 0.0363, "rewards/accuracies": 1.0, "rewards/generated": -3.4187331199645996, "rewards/margins": 5.429445266723633, "rewards/real": 2.0107123851776123, "step": 4570 }, { "epoch": 2.93, "learning_rate": 1.2914691943127961e-08, "logits/generated": 6.576046943664551, "logits/real": 4.6913862228393555, "logps/generated": -310.24871826171875, "logps/real": -215.4926300048828, "loss": 0.0441, "rewards/accuracies": 1.0, "rewards/generated": -3.60530424118042, "rewards/margins": 5.837168216705322, "rewards/real": 2.2318644523620605, "step": 4580 }, { "epoch": 2.94, "learning_rate": 1.1729857819905212e-08, "logits/generated": 6.390922546386719, "logits/real": 5.047102451324463, "logps/generated": -316.705322265625, "logps/real": -221.8660125732422, "loss": 0.0365, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.7050914764404297, "rewards/margins": 6.110095500946045, "rewards/real": 2.4050049781799316, "step": 4590 }, { "epoch": 2.94, "learning_rate": 1.0545023696682464e-08, "logits/generated": 6.420653343200684, "logits/real": 4.74511194229126, "logps/generated": -317.0543212890625, "logps/real": -218.3509979248047, "loss": 0.0397, "rewards/accuracies": 1.0, "rewards/generated": -3.9743943214416504, "rewards/margins": 6.493072509765625, "rewards/real": 2.5186777114868164, "step": 4600 }, { "epoch": 2.95, "learning_rate": 9.360189573459715e-09, "logits/generated": 6.5571417808532715, "logits/real": 4.7767133712768555, "logps/generated": -309.4183349609375, "logps/real": -237.371826171875, "loss": 0.0329, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.3121707439422607, "rewards/margins": 5.517508506774902, "rewards/real": 2.2053380012512207, "step": 4610 }, { "epoch": 2.96, "learning_rate": 8.175355450236966e-09, "logits/generated": 6.605482578277588, "logits/real": 4.707463264465332, "logps/generated": -327.2000427246094, "logps/real": -206.5057373046875, "loss": 0.0279, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.9358227252960205, "rewards/margins": 6.49956750869751, "rewards/real": 2.56374454498291, "step": 4620 }, { "epoch": 2.96, "learning_rate": 6.990521327014218e-09, "logits/generated": 6.495814323425293, "logits/real": 4.9706573486328125, "logps/generated": -296.98992919921875, "logps/real": -226.7158966064453, "loss": 0.0287, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.4257378578186035, "rewards/margins": 5.643680572509766, "rewards/real": 2.217942714691162, "step": 4630 }, { "epoch": 2.97, "learning_rate": 5.805687203791469e-09, "logits/generated": 6.507977485656738, "logits/real": 4.830941200256348, "logps/generated": -309.84075927734375, "logps/real": -210.8922119140625, "loss": 0.0384, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.37202525138855, "rewards/margins": 5.505019187927246, "rewards/real": 2.1329941749572754, "step": 4640 }, { "epoch": 2.98, "learning_rate": 4.62085308056872e-09, "logits/generated": 6.521729469299316, "logits/real": 4.934253215789795, "logps/generated": -309.5820007324219, "logps/real": -222.89987182617188, "loss": 0.0527, "rewards/accuracies": 1.0, "rewards/generated": -3.6575589179992676, "rewards/margins": 5.68410062789917, "rewards/real": 2.0265424251556396, "step": 4650 }, { "epoch": 2.98, "learning_rate": 3.4360189573459714e-09, "logits/generated": 6.571385860443115, "logits/real": 4.746085166931152, "logps/generated": -322.11639404296875, "logps/real": -239.91476440429688, "loss": 0.0411, "rewards/accuracies": 1.0, "rewards/generated": -3.9839770793914795, "rewards/margins": 6.489884376525879, "rewards/real": 2.5059072971343994, "step": 4660 }, { "epoch": 2.99, "learning_rate": 2.2511848341232227e-09, "logits/generated": 6.565645694732666, "logits/real": 4.96927547454834, "logps/generated": -318.505859375, "logps/real": -256.3047180175781, "loss": 0.0367, "rewards/accuracies": 1.0, "rewards/generated": -3.7661774158477783, "rewards/margins": 5.747965335845947, "rewards/real": 1.981787919998169, "step": 4670 }, { "epoch": 2.99, "learning_rate": 1.0663507109004738e-09, "logits/generated": 6.504978179931641, "logits/real": 4.623128414154053, "logps/generated": -328.41619873046875, "logps/real": -214.21871948242188, "loss": 0.0385, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.7540199756622314, "rewards/margins": 5.994838237762451, "rewards/real": 2.2408182621002197, "step": 4680 }, { "epoch": 3.0, "step": 4689, "total_flos": 0.0, "train_loss": 0.16397903456657928, "train_runtime": 29965.1509, "train_samples_per_second": 5.006, "train_steps_per_second": 0.156 } ], "logging_steps": 10, "max_steps": 4689, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }