|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 4689, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.066098081023454e-09, |
|
"logits/generated": 6.313449382781982, |
|
"logits/real": 4.503366947174072, |
|
"logps/generated": -273.765380859375, |
|
"logps/real": -221.5892333984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.0660980810234541e-08, |
|
"logits/generated": 6.417145729064941, |
|
"logits/real": 4.833721160888672, |
|
"logps/generated": -281.4638366699219, |
|
"logps/real": -251.9319610595703, |
|
"loss": 0.6946, |
|
"rewards/accuracies": 0.0416666679084301, |
|
"rewards/generated": 0.0008282132912427187, |
|
"rewards/margins": -0.0019732369109988213, |
|
"rewards/real": -0.0011450237361714244, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.1321961620469082e-08, |
|
"logits/generated": 6.508301734924316, |
|
"logits/real": 4.467240333557129, |
|
"logps/generated": -281.99346923828125, |
|
"logps/real": -244.68807983398438, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/generated": -0.01464166771620512, |
|
"rewards/margins": 0.011996939778327942, |
|
"rewards/real": -0.0026447249110788107, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.1982942430703625e-08, |
|
"logits/generated": 6.402890682220459, |
|
"logits/real": 4.9113664627075195, |
|
"logps/generated": -280.8014831542969, |
|
"logps/real": -265.4349365234375, |
|
"loss": 0.6959, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/generated": 0.0067442902363836765, |
|
"rewards/margins": 0.00477126520127058, |
|
"rewards/real": 0.011515555903315544, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.2643923240938164e-08, |
|
"logits/generated": 6.590306758880615, |
|
"logits/real": 4.578217029571533, |
|
"logps/generated": -270.75592041015625, |
|
"logps/real": -240.4984130859375, |
|
"loss": 0.7, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/generated": 0.0005118753761053085, |
|
"rewards/margins": -0.0064646475948393345, |
|
"rewards/real": -0.005952772684395313, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.3304904051172704e-08, |
|
"logits/generated": 6.504673957824707, |
|
"logits/real": 4.641873359680176, |
|
"logps/generated": -276.2430419921875, |
|
"logps/real": -224.84249877929688, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/generated": -0.033875368535518646, |
|
"rewards/margins": 0.02717522345483303, |
|
"rewards/real": -0.006700146943330765, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.396588486140725e-08, |
|
"logits/generated": 6.473546504974365, |
|
"logits/real": 5.055091381072998, |
|
"logps/generated": -289.09674072265625, |
|
"logps/real": -243.2639923095703, |
|
"loss": 0.6977, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/generated": -0.015207557007670403, |
|
"rewards/margins": 0.020673025399446487, |
|
"rewards/real": 0.00546546746045351, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.462686567164178e-08, |
|
"logits/generated": 6.487510681152344, |
|
"logits/real": 4.849614143371582, |
|
"logps/generated": -283.6287536621094, |
|
"logps/real": -268.0138854980469, |
|
"loss": 0.6988, |
|
"rewards/accuracies": 0.5, |
|
"rewards/generated": -0.020940685644745827, |
|
"rewards/margins": 0.0006496586138382554, |
|
"rewards/real": -0.020291026681661606, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.528784648187633e-08, |
|
"logits/generated": 6.551278114318848, |
|
"logits/real": 4.523703575134277, |
|
"logps/generated": -283.064697265625, |
|
"logps/real": -237.7630157470703, |
|
"loss": 0.6961, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/generated": -0.003950852435082197, |
|
"rewards/margins": 0.003412533551454544, |
|
"rewards/real": -0.0005383208626881242, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.594882729211087e-08, |
|
"logits/generated": 6.489449501037598, |
|
"logits/real": 4.616640090942383, |
|
"logps/generated": -294.86614990234375, |
|
"logps/real": -257.0780029296875, |
|
"loss": 0.6965, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/generated": -0.001603059470653534, |
|
"rewards/margins": -0.0017856752965599298, |
|
"rewards/real": -0.0033887363970279694, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0660980810234541e-07, |
|
"logits/generated": 6.500540256500244, |
|
"logits/real": 4.6778364181518555, |
|
"logps/generated": -280.27447509765625, |
|
"logps/real": -240.3990020751953, |
|
"loss": 0.6985, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/generated": 0.01377450954169035, |
|
"rewards/margins": -0.026643192395567894, |
|
"rewards/real": -0.01286868192255497, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.1727078891257995e-07, |
|
"logits/generated": 6.49435567855835, |
|
"logits/real": 4.850351810455322, |
|
"logps/generated": -284.07623291015625, |
|
"logps/real": -256.1934814453125, |
|
"loss": 0.699, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/generated": 0.002641477854922414, |
|
"rewards/margins": 0.004401583690196276, |
|
"rewards/real": 0.007043060846626759, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.279317697228145e-07, |
|
"logits/generated": 6.4412078857421875, |
|
"logits/real": 4.881103038787842, |
|
"logps/generated": -281.96685791015625, |
|
"logps/real": -240.1349334716797, |
|
"loss": 0.7007, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/generated": -0.010973912663757801, |
|
"rewards/margins": -0.00332402135245502, |
|
"rewards/real": -0.014297932386398315, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3859275053304903e-07, |
|
"logits/generated": 6.572972297668457, |
|
"logits/real": 4.993831634521484, |
|
"logps/generated": -278.8970031738281, |
|
"logps/real": -253.2180633544922, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/generated": -0.0238101277500391, |
|
"rewards/margins": 0.015025329776108265, |
|
"rewards/real": -0.00878479890525341, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.4925373134328355e-07, |
|
"logits/generated": 6.496808052062988, |
|
"logits/real": 4.527266979217529, |
|
"logps/generated": -286.42230224609375, |
|
"logps/real": -238.97653198242188, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/generated": -0.015367841348052025, |
|
"rewards/margins": 0.015413427725434303, |
|
"rewards/real": 4.558637738227844e-05, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.5991471215351813e-07, |
|
"logits/generated": 6.386403560638428, |
|
"logits/real": 5.1331682205200195, |
|
"logps/generated": -288.5858459472656, |
|
"logps/real": -252.1357879638672, |
|
"loss": 0.6973, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/generated": 0.001897630630992353, |
|
"rewards/margins": -0.013987990096211433, |
|
"rewards/real": -0.012090359814465046, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.7057569296375266e-07, |
|
"logits/generated": 6.469916343688965, |
|
"logits/real": 4.803389549255371, |
|
"logps/generated": -279.75665283203125, |
|
"logps/real": -249.20278930664062, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/generated": -0.00042929648770950735, |
|
"rewards/margins": -0.006332563702017069, |
|
"rewards/real": -0.0067618610337376595, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8123667377398718e-07, |
|
"logits/generated": 6.469520568847656, |
|
"logits/real": 4.792954444885254, |
|
"logps/generated": -278.6644592285156, |
|
"logps/real": -238.6741485595703, |
|
"loss": 0.694, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/generated": -0.006913432385772467, |
|
"rewards/margins": 0.017923034727573395, |
|
"rewards/real": 0.01100960187613964, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9189765458422174e-07, |
|
"logits/generated": 6.304480075836182, |
|
"logits/real": 5.01064395904541, |
|
"logps/generated": -292.3460388183594, |
|
"logps/real": -263.7362976074219, |
|
"loss": 0.7018, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/generated": 0.00147080363240093, |
|
"rewards/margins": -0.01612243428826332, |
|
"rewards/real": -0.014651629142463207, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.025586353944563e-07, |
|
"logits/generated": 6.497210502624512, |
|
"logits/real": 4.78942346572876, |
|
"logps/generated": -282.570068359375, |
|
"logps/real": -263.93743896484375, |
|
"loss": 0.6982, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/generated": 0.003275098744779825, |
|
"rewards/margins": -0.005347794853150845, |
|
"rewards/real": -0.0020726968068629503, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.1321961620469082e-07, |
|
"logits/generated": 6.557346343994141, |
|
"logits/real": 4.690012454986572, |
|
"logps/generated": -289.92633056640625, |
|
"logps/real": -247.98934936523438, |
|
"loss": 0.6967, |
|
"rewards/accuracies": 0.5, |
|
"rewards/generated": -0.004312173463404179, |
|
"rewards/margins": 0.004754967056214809, |
|
"rewards/real": 0.00044279481517151, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.2388059701492537e-07, |
|
"logits/generated": 6.627219200134277, |
|
"logits/real": 4.5740742683410645, |
|
"logps/generated": -274.96441650390625, |
|
"logps/real": -227.3006591796875, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/generated": -0.019664818421006203, |
|
"rewards/margins": 0.017926085740327835, |
|
"rewards/real": -0.0017387343104928732, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.345415778251599e-07, |
|
"logits/generated": 6.484269618988037, |
|
"logits/real": 4.570880889892578, |
|
"logps/generated": -285.11175537109375, |
|
"logps/real": -222.089111328125, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/generated": -0.00420150812715292, |
|
"rewards/margins": -0.003182200016453862, |
|
"rewards/real": -0.007383708842098713, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.452025586353944e-07, |
|
"logits/generated": 6.622496128082275, |
|
"logits/real": 4.402390480041504, |
|
"logps/generated": -283.72247314453125, |
|
"logps/real": -238.875244140625, |
|
"loss": 0.6884, |
|
"rewards/accuracies": 0.625, |
|
"rewards/generated": -0.0316481851041317, |
|
"rewards/margins": 0.037973009049892426, |
|
"rewards/real": 0.00632482161745429, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.55863539445629e-07, |
|
"logits/generated": 6.519092559814453, |
|
"logits/real": 5.1569108963012695, |
|
"logps/generated": -294.76727294921875, |
|
"logps/real": -287.53533935546875, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/generated": -0.0013995547778904438, |
|
"rewards/margins": 0.0036062332801520824, |
|
"rewards/real": 0.002206678967922926, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.665245202558635e-07, |
|
"logits/generated": 6.4200615882873535, |
|
"logits/real": 4.65548038482666, |
|
"logps/generated": -285.3873596191406, |
|
"logps/real": -228.32009887695312, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/generated": 0.003386992961168289, |
|
"rewards/margins": 0.010126499459147453, |
|
"rewards/real": 0.013513492420315742, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7718550106609805e-07, |
|
"logits/generated": 6.4657487869262695, |
|
"logits/real": 4.301976203918457, |
|
"logps/generated": -280.96539306640625, |
|
"logps/real": -224.1595458984375, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/generated": -0.006485415156930685, |
|
"rewards/margins": 0.011584701016545296, |
|
"rewards/real": 0.005099285393953323, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.878464818763326e-07, |
|
"logits/generated": 6.452627658843994, |
|
"logits/real": 4.714515209197998, |
|
"logps/generated": -274.6763000488281, |
|
"logps/real": -239.6040802001953, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/generated": -0.011514711193740368, |
|
"rewards/margins": 0.023869235068559647, |
|
"rewards/real": 0.012354524806141853, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.985074626865671e-07, |
|
"logits/generated": 6.498050689697266, |
|
"logits/real": 4.76090669631958, |
|
"logps/generated": -270.68817138671875, |
|
"logps/real": -256.23040771484375, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/generated": -0.023040171712636948, |
|
"rewards/margins": 0.00843932293355465, |
|
"rewards/real": -0.014600845053792, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.0916844349680174e-07, |
|
"logits/generated": 6.579047203063965, |
|
"logits/real": 4.5393829345703125, |
|
"logps/generated": -292.82879638671875, |
|
"logps/real": -251.0160369873047, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/generated": -0.0038245960604399443, |
|
"rewards/margins": 0.011936083436012268, |
|
"rewards/real": 0.008111484348773956, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.1982942430703626e-07, |
|
"logits/generated": 6.381264686584473, |
|
"logits/real": 4.537992477416992, |
|
"logps/generated": -273.74224853515625, |
|
"logps/real": -239.7407684326172, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/generated": -0.004199598915874958, |
|
"rewards/margins": 0.017961198464035988, |
|
"rewards/real": 0.01376159768551588, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.304904051172708e-07, |
|
"logits/generated": 6.469088554382324, |
|
"logits/real": 4.945279598236084, |
|
"logps/generated": -275.0647277832031, |
|
"logps/real": -260.91900634765625, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.625, |
|
"rewards/generated": -0.0022400771267712116, |
|
"rewards/margins": 0.012093605473637581, |
|
"rewards/real": 0.009853528812527657, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.411513859275053e-07, |
|
"logits/generated": 6.3869123458862305, |
|
"logits/real": 4.953930854797363, |
|
"logps/generated": -277.23260498046875, |
|
"logps/real": -258.80120849609375, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/generated": 0.0009156037122011185, |
|
"rewards/margins": 0.01676994189620018, |
|
"rewards/real": 0.01768554374575615, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.5181236673773984e-07, |
|
"logits/generated": 6.399374961853027, |
|
"logits/real": 4.842419147491455, |
|
"logps/generated": -270.574951171875, |
|
"logps/real": -259.90386962890625, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/generated": -0.014367667958140373, |
|
"rewards/margins": 0.042558759450912476, |
|
"rewards/real": 0.0281910952180624, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.6247334754797437e-07, |
|
"logits/generated": 6.49670934677124, |
|
"logits/real": 4.750965118408203, |
|
"logps/generated": -281.72406005859375, |
|
"logps/real": -254.224853515625, |
|
"loss": 0.6784, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/generated": 0.0038049505092203617, |
|
"rewards/margins": 0.02935839258134365, |
|
"rewards/real": 0.03316333517432213, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.7313432835820895e-07, |
|
"logits/generated": 6.555943965911865, |
|
"logits/real": 4.749678134918213, |
|
"logps/generated": -271.36529541015625, |
|
"logps/real": -246.26235961914062, |
|
"loss": 0.6711, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/generated": -0.008218175731599331, |
|
"rewards/margins": 0.044897980988025665, |
|
"rewards/real": 0.03667980432510376, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.8379530916844347e-07, |
|
"logits/generated": 6.510591983795166, |
|
"logits/real": 4.735245227813721, |
|
"logps/generated": -277.96197509765625, |
|
"logps/real": -237.43386840820312, |
|
"loss": 0.6671, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/generated": -0.022423155605793, |
|
"rewards/margins": 0.0445123165845871, |
|
"rewards/real": 0.022089168429374695, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9445628997867805e-07, |
|
"logits/generated": 6.379269599914551, |
|
"logits/real": 5.035122871398926, |
|
"logps/generated": -270.5735778808594, |
|
"logps/real": -274.4202880859375, |
|
"loss": 0.6704, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/generated": -0.0076493457891047, |
|
"rewards/margins": 0.043878089636564255, |
|
"rewards/real": 0.03622874245047569, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.051172707889126e-07, |
|
"logits/generated": 6.588892459869385, |
|
"logits/real": 4.547529697418213, |
|
"logps/generated": -293.5452880859375, |
|
"logps/real": -258.05047607421875, |
|
"loss": 0.67, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/generated": -0.014542962424457073, |
|
"rewards/margins": 0.04654566943645477, |
|
"rewards/real": 0.032002706080675125, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.157782515991471e-07, |
|
"logits/generated": 6.528749942779541, |
|
"logits/real": 4.817520618438721, |
|
"logps/generated": -278.42205810546875, |
|
"logps/real": -245.11782836914062, |
|
"loss": 0.6648, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/generated": -0.01723286882042885, |
|
"rewards/margins": 0.05723923444747925, |
|
"rewards/real": 0.0400063656270504, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.2643923240938163e-07, |
|
"logits/generated": 6.4185590744018555, |
|
"logits/real": 4.532934665679932, |
|
"logps/generated": -268.5301208496094, |
|
"logps/real": -250.79397583007812, |
|
"loss": 0.6603, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/generated": -0.02642243169248104, |
|
"rewards/margins": 0.08120186626911163, |
|
"rewards/real": 0.05477944016456604, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.371002132196162e-07, |
|
"logits/generated": 6.537232875823975, |
|
"logits/real": 4.77290678024292, |
|
"logps/generated": -285.8045959472656, |
|
"logps/real": -244.49655151367188, |
|
"loss": 0.6525, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/generated": -0.012522486969828606, |
|
"rewards/margins": 0.08892401307821274, |
|
"rewards/real": 0.07640153169631958, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.4776119402985074e-07, |
|
"logits/generated": 6.560798645019531, |
|
"logits/real": 4.509766578674316, |
|
"logps/generated": -264.9848327636719, |
|
"logps/real": -250.44418334960938, |
|
"loss": 0.6491, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/generated": -0.00984070636332035, |
|
"rewards/margins": 0.07440716028213501, |
|
"rewards/real": 0.06456644833087921, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5842217484008526e-07, |
|
"logits/generated": 6.5955400466918945, |
|
"logits/real": 4.605684757232666, |
|
"logps/generated": -271.46160888671875, |
|
"logps/real": -233.9333953857422, |
|
"loss": 0.6469, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -0.016519565135240555, |
|
"rewards/margins": 0.10620995610952377, |
|
"rewards/real": 0.08969040215015411, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.690831556503198e-07, |
|
"logits/generated": 6.498379707336426, |
|
"logits/real": 4.555215358734131, |
|
"logps/generated": -275.9375, |
|
"logps/real": -245.8618621826172, |
|
"loss": 0.6381, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/generated": -0.019702184945344925, |
|
"rewards/margins": 0.10273710638284683, |
|
"rewards/real": 0.08303491771221161, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.797441364605543e-07, |
|
"logits/generated": 6.488035678863525, |
|
"logits/real": 4.758559226989746, |
|
"logps/generated": -292.8480224609375, |
|
"logps/real": -235.81704711914062, |
|
"loss": 0.6266, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/generated": -0.0321931466460228, |
|
"rewards/margins": 0.13845106959342957, |
|
"rewards/real": 0.10625793039798737, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.904051172707888e-07, |
|
"logits/generated": 6.370500564575195, |
|
"logits/real": 5.3891401290893555, |
|
"logps/generated": -288.18426513671875, |
|
"logps/real": -262.585693359375, |
|
"loss": 0.615, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/generated": -0.047702450305223465, |
|
"rewards/margins": 0.16840405762195587, |
|
"rewards/real": 0.12070159614086151, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.998815165876776e-07, |
|
"logits/generated": 6.4978346824646, |
|
"logits/real": 4.823352813720703, |
|
"logps/generated": -282.669921875, |
|
"logps/real": -243.49526977539062, |
|
"loss": 0.616, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/generated": -0.03011218085885048, |
|
"rewards/margins": 0.1787882149219513, |
|
"rewards/real": 0.14867602288722992, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.98696682464455e-07, |
|
"logits/generated": 6.336424350738525, |
|
"logits/real": 4.733750343322754, |
|
"logps/generated": -267.9712829589844, |
|
"logps/real": -241.704345703125, |
|
"loss": 0.6071, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/generated": -0.01858561486005783, |
|
"rewards/margins": 0.17029884457588196, |
|
"rewards/real": 0.15171322226524353, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.975118483412322e-07, |
|
"logits/generated": 6.465763092041016, |
|
"logits/real": 4.750241279602051, |
|
"logps/generated": -276.46527099609375, |
|
"logps/real": -227.153564453125, |
|
"loss": 0.5994, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/generated": -0.020980175584554672, |
|
"rewards/margins": 0.18550223112106323, |
|
"rewards/real": 0.16452205181121826, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.963270142180094e-07, |
|
"logits/generated": 6.40543270111084, |
|
"logits/real": 4.948707103729248, |
|
"logps/generated": -280.562255859375, |
|
"logps/real": -245.1046142578125, |
|
"loss": 0.5866, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -0.08627736568450928, |
|
"rewards/margins": 0.2752222418785095, |
|
"rewards/real": 0.18894490599632263, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.951421800947867e-07, |
|
"logits/generated": 6.454255104064941, |
|
"logits/real": 4.428702354431152, |
|
"logps/generated": -277.8967590332031, |
|
"logps/real": -239.1210479736328, |
|
"loss": 0.5713, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.04224959388375282, |
|
"rewards/margins": 0.2869378924369812, |
|
"rewards/real": 0.24468836188316345, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.93957345971564e-07, |
|
"logits/generated": 6.436443328857422, |
|
"logits/real": 4.778555393218994, |
|
"logps/generated": -269.21307373046875, |
|
"logps/real": -231.3462677001953, |
|
"loss": 0.5635, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -0.027034465223550797, |
|
"rewards/margins": 0.30413612723350525, |
|
"rewards/real": 0.27710166573524475, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.927725118483413e-07, |
|
"logits/generated": 6.379159927368164, |
|
"logits/real": 4.740503787994385, |
|
"logps/generated": -271.0333557128906, |
|
"logps/real": -231.5590057373047, |
|
"loss": 0.5598, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.03343794867396355, |
|
"rewards/margins": 0.2944517731666565, |
|
"rewards/real": 0.26101383566856384, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.915876777251184e-07, |
|
"logits/generated": 6.4404754638671875, |
|
"logits/real": 4.698214054107666, |
|
"logps/generated": -269.7973937988281, |
|
"logps/real": -245.9893798828125, |
|
"loss": 0.5512, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -0.03939288109540939, |
|
"rewards/margins": 0.3222863972187042, |
|
"rewards/real": 0.28289347887039185, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.904028436018957e-07, |
|
"logits/generated": 6.421341896057129, |
|
"logits/real": 4.729471683502197, |
|
"logps/generated": -283.0313720703125, |
|
"logps/real": -245.7028350830078, |
|
"loss": 0.5334, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.06327588111162186, |
|
"rewards/margins": 0.36130860447883606, |
|
"rewards/real": 0.2980327010154724, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.892180094786729e-07, |
|
"logits/generated": 6.3695831298828125, |
|
"logits/real": 4.896943092346191, |
|
"logps/generated": -275.6328125, |
|
"logps/real": -234.8939666748047, |
|
"loss": 0.5383, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -0.043337948620319366, |
|
"rewards/margins": 0.3582867383956909, |
|
"rewards/real": 0.31494876742362976, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.880331753554502e-07, |
|
"logits/generated": 6.409314155578613, |
|
"logits/real": 4.403968811035156, |
|
"logps/generated": -288.6573181152344, |
|
"logps/real": -252.0238800048828, |
|
"loss": 0.5142, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.09962789714336395, |
|
"rewards/margins": 0.4413130283355713, |
|
"rewards/real": 0.34168511629104614, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.868483412322275e-07, |
|
"logits/generated": 6.514178276062012, |
|
"logits/real": 4.654687404632568, |
|
"logps/generated": -276.91796875, |
|
"logps/real": -247.6042938232422, |
|
"loss": 0.5149, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -0.07225228101015091, |
|
"rewards/margins": 0.36416369676589966, |
|
"rewards/real": 0.29191142320632935, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.856635071090047e-07, |
|
"logits/generated": 6.484377861022949, |
|
"logits/real": 4.610289096832275, |
|
"logps/generated": -287.1705322265625, |
|
"logps/real": -248.52066040039062, |
|
"loss": 0.4863, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.12423114478588104, |
|
"rewards/margins": 0.5095083713531494, |
|
"rewards/real": 0.38527724146842957, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.84478672985782e-07, |
|
"logits/generated": 6.429436683654785, |
|
"logits/real": 4.436240196228027, |
|
"logps/generated": -285.28814697265625, |
|
"logps/real": -233.54312133789062, |
|
"loss": 0.4702, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.11845574527978897, |
|
"rewards/margins": 0.5989123582839966, |
|
"rewards/real": 0.4804566502571106, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.832938388625591e-07, |
|
"logits/generated": 6.502951145172119, |
|
"logits/real": 4.6671462059021, |
|
"logps/generated": -281.2021179199219, |
|
"logps/real": -235.6467742919922, |
|
"loss": 0.4764, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -0.07756136357784271, |
|
"rewards/margins": 0.5416086316108704, |
|
"rewards/real": 0.46404728293418884, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.821090047393365e-07, |
|
"logits/generated": 6.4275922775268555, |
|
"logits/real": 4.628044128417969, |
|
"logps/generated": -287.75958251953125, |
|
"logps/real": -230.74954223632812, |
|
"loss": 0.4542, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.11417678743600845, |
|
"rewards/margins": 0.655004620552063, |
|
"rewards/real": 0.5408278703689575, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.809241706161137e-07, |
|
"logits/generated": 6.4412336349487305, |
|
"logits/real": 5.0383100509643555, |
|
"logps/generated": -262.37652587890625, |
|
"logps/real": -250.28353881835938, |
|
"loss": 0.4457, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.051359303295612335, |
|
"rewards/margins": 0.6307464838027954, |
|
"rewards/real": 0.5793871879577637, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.79739336492891e-07, |
|
"logits/generated": 6.4190993309021, |
|
"logits/real": 4.736918926239014, |
|
"logps/generated": -292.39813232421875, |
|
"logps/real": -238.57089233398438, |
|
"loss": 0.4305, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.06552235782146454, |
|
"rewards/margins": 0.6890299916267395, |
|
"rewards/real": 0.623507559299469, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.785545023696682e-07, |
|
"logits/generated": 6.509694576263428, |
|
"logits/real": 4.679415702819824, |
|
"logps/generated": -283.118896484375, |
|
"logps/real": -251.4104766845703, |
|
"loss": 0.414, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.12958040833473206, |
|
"rewards/margins": 0.7429105043411255, |
|
"rewards/real": 0.613330066204071, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.773696682464455e-07, |
|
"logits/generated": 6.4275031089782715, |
|
"logits/real": 4.612570285797119, |
|
"logps/generated": -283.679931640625, |
|
"logps/real": -234.7230987548828, |
|
"loss": 0.4117, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -0.12087702751159668, |
|
"rewards/margins": 0.8004452586174011, |
|
"rewards/real": 0.679568350315094, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.7618483412322273e-07, |
|
"logits/generated": 6.529521942138672, |
|
"logits/real": 4.53403377532959, |
|
"logps/generated": -276.1392822265625, |
|
"logps/real": -226.3787078857422, |
|
"loss": 0.4186, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.10057850182056427, |
|
"rewards/margins": 0.805554211139679, |
|
"rewards/real": 0.7049756646156311, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.7499999999999995e-07, |
|
"logits/generated": 6.591378688812256, |
|
"logits/real": 4.763890743255615, |
|
"logps/generated": -295.3109436035156, |
|
"logps/real": -235.9544677734375, |
|
"loss": 0.3916, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.20255105197429657, |
|
"rewards/margins": 0.8574334979057312, |
|
"rewards/real": 0.654882550239563, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.738151658767772e-07, |
|
"logits/generated": 6.50357723236084, |
|
"logits/real": 4.819157123565674, |
|
"logps/generated": -287.8582763671875, |
|
"logps/real": -247.63803100585938, |
|
"loss": 0.381, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.17564311623573303, |
|
"rewards/margins": 0.9119162559509277, |
|
"rewards/real": 0.7362731695175171, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.726303317535545e-07, |
|
"logits/generated": 6.48019552230835, |
|
"logits/real": 4.711845397949219, |
|
"logps/generated": -269.28668212890625, |
|
"logps/real": -223.31192016601562, |
|
"loss": 0.385, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.08161990344524384, |
|
"rewards/margins": 0.8838006258010864, |
|
"rewards/real": 0.8021806478500366, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.7144549763033177e-07, |
|
"logits/generated": 6.481300354003906, |
|
"logits/real": 4.540422439575195, |
|
"logps/generated": -285.6842346191406, |
|
"logps/real": -235.37417602539062, |
|
"loss": 0.3653, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.19102832674980164, |
|
"rewards/margins": 0.9631514549255371, |
|
"rewards/real": 0.7721230983734131, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.70260663507109e-07, |
|
"logits/generated": 6.4215497970581055, |
|
"logits/real": 4.966759204864502, |
|
"logps/generated": -279.190185546875, |
|
"logps/real": -242.8605194091797, |
|
"loss": 0.352, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -0.12265386432409286, |
|
"rewards/margins": 0.973602294921875, |
|
"rewards/real": 0.8509486317634583, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.690758293838862e-07, |
|
"logits/generated": 6.504040718078613, |
|
"logits/real": 5.0368146896362305, |
|
"logps/generated": -269.21197509765625, |
|
"logps/real": -249.8148956298828, |
|
"loss": 0.3605, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.09707117080688477, |
|
"rewards/margins": 0.9983582496643066, |
|
"rewards/real": 0.9012872576713562, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.678909952606635e-07, |
|
"logits/generated": 6.4590349197387695, |
|
"logits/real": 4.72456693649292, |
|
"logps/generated": -287.1020202636719, |
|
"logps/real": -243.59365844726562, |
|
"loss": 0.3471, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.19013145565986633, |
|
"rewards/margins": 1.1382641792297363, |
|
"rewards/real": 0.9481328129768372, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.667061611374407e-07, |
|
"logits/generated": 6.586329460144043, |
|
"logits/real": 4.696109771728516, |
|
"logps/generated": -294.53399658203125, |
|
"logps/real": -256.1171569824219, |
|
"loss": 0.3363, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.23052604496479034, |
|
"rewards/margins": 1.134413242340088, |
|
"rewards/real": 0.9038872718811035, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.65521327014218e-07, |
|
"logits/generated": 6.411507606506348, |
|
"logits/real": 5.131626129150391, |
|
"logps/generated": -288.7245178222656, |
|
"logps/real": -269.36541748046875, |
|
"loss": 0.3392, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -0.23499789834022522, |
|
"rewards/margins": 1.1348694562911987, |
|
"rewards/real": 0.8998715281486511, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.6433649289099525e-07, |
|
"logits/generated": 6.4309258460998535, |
|
"logits/real": 4.649967193603516, |
|
"logps/generated": -277.1844787597656, |
|
"logps/real": -242.71011352539062, |
|
"loss": 0.3126, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.2691335380077362, |
|
"rewards/margins": 1.2196036577224731, |
|
"rewards/real": 0.9504700899124146, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.631516587677725e-07, |
|
"logits/generated": 6.520060062408447, |
|
"logits/real": 4.70443058013916, |
|
"logps/generated": -276.5824890136719, |
|
"logps/real": -213.98782348632812, |
|
"loss": 0.3163, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -0.2217775583267212, |
|
"rewards/margins": 1.2883174419403076, |
|
"rewards/real": 1.0665398836135864, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6196682464454974e-07, |
|
"logits/generated": 6.340624809265137, |
|
"logits/real": 4.910890102386475, |
|
"logps/generated": -276.492431640625, |
|
"logps/real": -250.92135620117188, |
|
"loss": 0.3078, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.26993709802627563, |
|
"rewards/margins": 1.3064370155334473, |
|
"rewards/real": 1.0364999771118164, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.60781990521327e-07, |
|
"logits/generated": 6.535134315490723, |
|
"logits/real": 4.642252445220947, |
|
"logps/generated": -282.04254150390625, |
|
"logps/real": -224.3877716064453, |
|
"loss": 0.2998, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.2987816333770752, |
|
"rewards/margins": 1.3563276529312134, |
|
"rewards/real": 1.057545781135559, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.5959715639810423e-07, |
|
"logits/generated": 6.41034460067749, |
|
"logits/real": 4.7313551902771, |
|
"logps/generated": -269.9966735839844, |
|
"logps/real": -222.67361450195312, |
|
"loss": 0.3116, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -0.1768857091665268, |
|
"rewards/margins": 1.3481709957122803, |
|
"rewards/real": 1.1712852716445923, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.5841232227488145e-07, |
|
"logits/generated": 6.5445876121521, |
|
"logits/real": 4.706090450286865, |
|
"logps/generated": -292.2023010253906, |
|
"logps/real": -230.2037353515625, |
|
"loss": 0.3059, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.3371911942958832, |
|
"rewards/margins": 1.470198392868042, |
|
"rewards/real": 1.1330074071884155, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.5722748815165873e-07, |
|
"logits/generated": 6.492764949798584, |
|
"logits/real": 4.727120876312256, |
|
"logps/generated": -286.11614990234375, |
|
"logps/real": -236.4132080078125, |
|
"loss": 0.2588, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.34763607382774353, |
|
"rewards/margins": 1.618819236755371, |
|
"rewards/real": 1.2711832523345947, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.56042654028436e-07, |
|
"logits/generated": 6.487623691558838, |
|
"logits/real": 4.916855335235596, |
|
"logps/generated": -301.30670166015625, |
|
"logps/real": -261.5294494628906, |
|
"loss": 0.2905, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -0.26863616704940796, |
|
"rewards/margins": 1.2958793640136719, |
|
"rewards/real": 1.0272432565689087, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5485781990521327e-07, |
|
"logits/generated": 6.653237342834473, |
|
"logits/real": 4.623661518096924, |
|
"logps/generated": -285.1461181640625, |
|
"logps/real": -243.27243041992188, |
|
"loss": 0.3001, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.20099958777427673, |
|
"rewards/margins": 1.3751389980316162, |
|
"rewards/real": 1.174139380455017, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.536729857819905e-07, |
|
"logits/generated": 6.405764579772949, |
|
"logits/real": 4.904845714569092, |
|
"logps/generated": -265.61737060546875, |
|
"logps/real": -236.12594604492188, |
|
"loss": 0.2656, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.2864135205745697, |
|
"rewards/margins": 1.4618184566497803, |
|
"rewards/real": 1.1754049062728882, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.5248815165876776e-07, |
|
"logits/generated": 6.408907890319824, |
|
"logits/real": 4.733492374420166, |
|
"logps/generated": -282.3438415527344, |
|
"logps/real": -248.62728881835938, |
|
"loss": 0.2501, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.32550299167633057, |
|
"rewards/margins": 1.5754355192184448, |
|
"rewards/real": 1.2499325275421143, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.5130331753554504e-07, |
|
"logits/generated": 6.523440361022949, |
|
"logits/real": 4.893500804901123, |
|
"logps/generated": -274.6362609863281, |
|
"logps/real": -239.6786346435547, |
|
"loss": 0.2517, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.37395572662353516, |
|
"rewards/margins": 1.6604623794555664, |
|
"rewards/real": 1.2865066528320312, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5011848341232226e-07, |
|
"logits/generated": 6.497984886169434, |
|
"logits/real": 4.835686683654785, |
|
"logps/generated": -284.5157165527344, |
|
"logps/real": -240.1763458251953, |
|
"loss": 0.2639, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.3406885862350464, |
|
"rewards/margins": 1.6514053344726562, |
|
"rewards/real": 1.3107168674468994, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.489336492890995e-07, |
|
"logits/generated": 6.601628303527832, |
|
"logits/real": 4.59323787689209, |
|
"logps/generated": -289.57421875, |
|
"logps/real": -227.0326690673828, |
|
"loss": 0.2481, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -0.4485829770565033, |
|
"rewards/margins": 1.7304086685180664, |
|
"rewards/real": 1.2818256616592407, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.4774881516587675e-07, |
|
"logits/generated": 6.438521385192871, |
|
"logits/real": 5.212011814117432, |
|
"logps/generated": -274.275146484375, |
|
"logps/real": -256.7066345214844, |
|
"loss": 0.2254, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.3772863745689392, |
|
"rewards/margins": 1.8105132579803467, |
|
"rewards/real": 1.4332268238067627, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.46563981042654e-07, |
|
"logits/generated": 6.509451389312744, |
|
"logits/real": 4.610627174377441, |
|
"logps/generated": -291.47552490234375, |
|
"logps/real": -251.83706665039062, |
|
"loss": 0.2307, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.4892211854457855, |
|
"rewards/margins": 1.7918579578399658, |
|
"rewards/real": 1.3026366233825684, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.4537914691943124e-07, |
|
"logits/generated": 6.490546226501465, |
|
"logits/real": 4.814209938049316, |
|
"logps/generated": -273.9214782714844, |
|
"logps/real": -221.7511444091797, |
|
"loss": 0.2508, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.4122149348258972, |
|
"rewards/margins": 1.7060003280639648, |
|
"rewards/real": 1.2937853336334229, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.441943127962085e-07, |
|
"logits/generated": 6.425353050231934, |
|
"logits/real": 5.0255045890808105, |
|
"logps/generated": -280.7168884277344, |
|
"logps/real": -247.033935546875, |
|
"loss": 0.2304, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.47266262769699097, |
|
"rewards/margins": 1.8882691860198975, |
|
"rewards/real": 1.4156067371368408, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.430094786729858e-07, |
|
"logits/generated": 6.480774879455566, |
|
"logits/real": 4.786948204040527, |
|
"logps/generated": -291.06512451171875, |
|
"logps/real": -209.83615112304688, |
|
"loss": 0.2214, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.4364975094795227, |
|
"rewards/margins": 1.8951022624969482, |
|
"rewards/real": 1.4586045742034912, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4182464454976306e-07, |
|
"logits/generated": 6.375167369842529, |
|
"logits/real": 5.035046577453613, |
|
"logps/generated": -276.30718994140625, |
|
"logps/real": -235.35733032226562, |
|
"loss": 0.2082, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.40835681557655334, |
|
"rewards/margins": 2.0441970825195312, |
|
"rewards/real": 1.6358401775360107, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.4063981042654023e-07, |
|
"logits/generated": 6.5355730056762695, |
|
"logits/real": 4.934959888458252, |
|
"logps/generated": -287.76385498046875, |
|
"logps/real": -228.20425415039062, |
|
"loss": 0.1887, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.589410126209259, |
|
"rewards/margins": 2.164210557937622, |
|
"rewards/real": 1.5748002529144287, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.394549763033175e-07, |
|
"logits/generated": 6.452719211578369, |
|
"logits/real": 4.160326957702637, |
|
"logps/generated": -283.5166931152344, |
|
"logps/real": -217.1543426513672, |
|
"loss": 0.2238, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.24201051890850067, |
|
"rewards/margins": 2.0650689601898193, |
|
"rewards/real": 1.8230584859848022, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.382701421800948e-07, |
|
"logits/generated": 6.507842063903809, |
|
"logits/real": 5.069902420043945, |
|
"logps/generated": -269.7446594238281, |
|
"logps/real": -248.04714965820312, |
|
"loss": 0.1762, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -0.44552716612815857, |
|
"rewards/margins": 2.1091365814208984, |
|
"rewards/real": 1.6636091470718384, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.37085308056872e-07, |
|
"logits/generated": 6.380208492279053, |
|
"logits/real": 4.518065452575684, |
|
"logps/generated": -274.2218322753906, |
|
"logps/real": -219.1566925048828, |
|
"loss": 0.2048, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.46542254090309143, |
|
"rewards/margins": 2.0480804443359375, |
|
"rewards/real": 1.5826579332351685, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.3590047393364927e-07, |
|
"logits/generated": 6.60736608505249, |
|
"logits/real": 4.692660331726074, |
|
"logps/generated": -273.3490295410156, |
|
"logps/real": -219.55429077148438, |
|
"loss": 0.2376, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -0.2550446093082428, |
|
"rewards/margins": 1.8392921686172485, |
|
"rewards/real": 1.5842477083206177, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.3471563981042654e-07, |
|
"logits/generated": 6.539947509765625, |
|
"logits/real": 4.22275447845459, |
|
"logps/generated": -281.519775390625, |
|
"logps/real": -199.8513641357422, |
|
"loss": 0.1947, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -0.596612811088562, |
|
"rewards/margins": 2.411491870880127, |
|
"rewards/real": 1.8148788213729858, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.335308056872038e-07, |
|
"logits/generated": 6.498897552490234, |
|
"logits/real": 4.682340621948242, |
|
"logps/generated": -276.3707580566406, |
|
"logps/real": -240.2972412109375, |
|
"loss": 0.2016, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.45306864380836487, |
|
"rewards/margins": 1.9954931735992432, |
|
"rewards/real": 1.5424243211746216, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.32345971563981e-07, |
|
"logits/generated": 6.35316276550293, |
|
"logits/real": 4.76320219039917, |
|
"logps/generated": -291.69427490234375, |
|
"logps/real": -244.2530517578125, |
|
"loss": 0.1921, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.6429895162582397, |
|
"rewards/margins": 2.3938496112823486, |
|
"rewards/real": 1.7508599758148193, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.3116113744075825e-07, |
|
"logits/generated": 6.538656711578369, |
|
"logits/real": 4.506134986877441, |
|
"logps/generated": -297.8096923828125, |
|
"logps/real": -223.4852294921875, |
|
"loss": 0.1945, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.7660019397735596, |
|
"rewards/margins": 2.628037214279175, |
|
"rewards/real": 1.8620353937149048, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.299763033175355e-07, |
|
"logits/generated": 6.559047698974609, |
|
"logits/real": 4.753483772277832, |
|
"logps/generated": -283.09259033203125, |
|
"logps/real": -231.58828735351562, |
|
"loss": 0.1659, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.5766782164573669, |
|
"rewards/margins": 2.1829142570495605, |
|
"rewards/real": 1.6062358617782593, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.2879146919431274e-07, |
|
"logits/generated": 6.543219089508057, |
|
"logits/real": 4.634883880615234, |
|
"logps/generated": -292.83892822265625, |
|
"logps/real": -237.899658203125, |
|
"loss": 0.1848, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.7167092561721802, |
|
"rewards/margins": 2.470365524291992, |
|
"rewards/real": 1.7536563873291016, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.2760663507109e-07, |
|
"logits/generated": 6.605074405670166, |
|
"logits/real": 4.641299724578857, |
|
"logps/generated": -273.18243408203125, |
|
"logps/real": -222.3721923828125, |
|
"loss": 0.1918, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.5909595489501953, |
|
"rewards/margins": 2.6222665309906006, |
|
"rewards/real": 2.031306743621826, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.264218009478673e-07, |
|
"logits/generated": 6.473427772521973, |
|
"logits/real": 4.688056945800781, |
|
"logps/generated": -294.8439636230469, |
|
"logps/real": -218.5131378173828, |
|
"loss": 0.184, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.6649213433265686, |
|
"rewards/margins": 2.604104518890381, |
|
"rewards/real": 1.9391835927963257, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.2523696682464456e-07, |
|
"logits/generated": 6.37612247467041, |
|
"logits/real": 4.925015449523926, |
|
"logps/generated": -288.498779296875, |
|
"logps/real": -232.14883422851562, |
|
"loss": 0.1871, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.5896228551864624, |
|
"rewards/margins": 2.15082049369812, |
|
"rewards/real": 1.5611976385116577, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.240521327014218e-07, |
|
"logits/generated": 6.474294185638428, |
|
"logits/real": 4.607337474822998, |
|
"logps/generated": -271.22357177734375, |
|
"logps/real": -217.6533660888672, |
|
"loss": 0.1953, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -0.616405189037323, |
|
"rewards/margins": 2.434063196182251, |
|
"rewards/real": 1.8176580667495728, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.22867298578199e-07, |
|
"logits/generated": 6.460890769958496, |
|
"logits/real": 4.362028121948242, |
|
"logps/generated": -294.4446105957031, |
|
"logps/real": -233.0, |
|
"loss": 0.1522, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.9041115641593933, |
|
"rewards/margins": 2.483468770980835, |
|
"rewards/real": 1.5793571472167969, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.216824644549763e-07, |
|
"logits/generated": 6.5067267417907715, |
|
"logits/real": 4.801357269287109, |
|
"logps/generated": -283.41253662109375, |
|
"logps/real": -229.07308959960938, |
|
"loss": 0.1674, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -0.8184356689453125, |
|
"rewards/margins": 2.9656457901000977, |
|
"rewards/real": 2.147209644317627, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.2049763033175355e-07, |
|
"logits/generated": 6.525651454925537, |
|
"logits/real": 4.5972700119018555, |
|
"logps/generated": -273.68255615234375, |
|
"logps/real": -226.0257568359375, |
|
"loss": 0.1825, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.7906166315078735, |
|
"rewards/margins": 2.4105656147003174, |
|
"rewards/real": 1.6199489831924438, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.1931279620853077e-07, |
|
"logits/generated": 6.486138820648193, |
|
"logits/real": 4.842376232147217, |
|
"logps/generated": -297.69329833984375, |
|
"logps/real": -236.61843872070312, |
|
"loss": 0.1377, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.8111250996589661, |
|
"rewards/margins": 2.760737657546997, |
|
"rewards/real": 1.9496123790740967, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.1812796208530804e-07, |
|
"logits/generated": 6.288398265838623, |
|
"logits/real": 5.120048522949219, |
|
"logps/generated": -302.489990234375, |
|
"logps/real": -220.8452606201172, |
|
"loss": 0.1719, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.9194461107254028, |
|
"rewards/margins": 2.725492000579834, |
|
"rewards/real": 1.8060458898544312, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.169431279620853e-07, |
|
"logits/generated": 6.422547817230225, |
|
"logits/real": 5.059387683868408, |
|
"logps/generated": -285.9918518066406, |
|
"logps/real": -246.36697387695312, |
|
"loss": 0.1815, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.8091152310371399, |
|
"rewards/margins": 2.879631280899048, |
|
"rewards/real": 2.0705161094665527, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.1575829383886253e-07, |
|
"logits/generated": 6.4782843589782715, |
|
"logits/real": 4.969704627990723, |
|
"logps/generated": -287.41973876953125, |
|
"logps/real": -207.5157928466797, |
|
"loss": 0.191, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.8577953577041626, |
|
"rewards/margins": 2.5767669677734375, |
|
"rewards/real": 1.718971848487854, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.145734597156398e-07, |
|
"logits/generated": 6.450139045715332, |
|
"logits/real": 4.915671348571777, |
|
"logps/generated": -270.8932189941406, |
|
"logps/real": -217.2313995361328, |
|
"loss": 0.172, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -0.7773370146751404, |
|
"rewards/margins": 2.6479990482330322, |
|
"rewards/real": 1.870661973953247, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.1338862559241703e-07, |
|
"logits/generated": 6.705965518951416, |
|
"logits/real": 4.81022834777832, |
|
"logps/generated": -294.56597900390625, |
|
"logps/real": -211.1824493408203, |
|
"loss": 0.16, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.0764670372009277, |
|
"rewards/margins": 3.1245856285095215, |
|
"rewards/real": 2.0481185913085938, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.122037914691943e-07, |
|
"logits/generated": 6.361940860748291, |
|
"logits/real": 4.980579376220703, |
|
"logps/generated": -287.1803283691406, |
|
"logps/real": -244.1328125, |
|
"loss": 0.1562, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.976216197013855, |
|
"rewards/margins": 2.9318318367004395, |
|
"rewards/real": 1.9556156396865845, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.110189573459715e-07, |
|
"logits/generated": 6.496147155761719, |
|
"logits/real": 4.533209800720215, |
|
"logps/generated": -292.2799377441406, |
|
"logps/real": -220.11685180664062, |
|
"loss": 0.145, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.9167261123657227, |
|
"rewards/margins": 3.0090811252593994, |
|
"rewards/real": 2.0923550128936768, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.098341232227488e-07, |
|
"logits/generated": 6.39020299911499, |
|
"logits/real": 4.5528459548950195, |
|
"logps/generated": -286.52435302734375, |
|
"logps/real": -214.3458251953125, |
|
"loss": 0.1624, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -0.9529097676277161, |
|
"rewards/margins": 2.9751744270324707, |
|
"rewards/real": 2.0222644805908203, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.0864928909952607e-07, |
|
"logits/generated": 6.4211554527282715, |
|
"logits/real": 4.6509599685668945, |
|
"logps/generated": -282.4293518066406, |
|
"logps/real": -223.8668975830078, |
|
"loss": 0.167, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -0.7745916247367859, |
|
"rewards/margins": 2.5993974208831787, |
|
"rewards/real": 1.8248056173324585, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.074644549763033e-07, |
|
"logits/generated": 6.483539581298828, |
|
"logits/real": 4.578310489654541, |
|
"logps/generated": -286.9674987792969, |
|
"logps/real": -238.4805145263672, |
|
"loss": 0.1563, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -0.8147264719009399, |
|
"rewards/margins": 2.793470859527588, |
|
"rewards/real": 1.9787445068359375, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.0627962085308056e-07, |
|
"logits/generated": 6.479307651519775, |
|
"logits/real": 4.371944427490234, |
|
"logps/generated": -293.8260803222656, |
|
"logps/real": -221.93624877929688, |
|
"loss": 0.1552, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.0739367008209229, |
|
"rewards/margins": 2.962772846221924, |
|
"rewards/real": 1.888836145401001, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.0509478672985783e-07, |
|
"logits/generated": 6.399652004241943, |
|
"logits/real": 5.044413089752197, |
|
"logps/generated": -293.4299621582031, |
|
"logps/real": -239.0977325439453, |
|
"loss": 0.1582, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.0476984977722168, |
|
"rewards/margins": 3.119697332382202, |
|
"rewards/real": 2.0719990730285645, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.0390995260663505e-07, |
|
"logits/generated": 6.399343967437744, |
|
"logits/real": 5.123462677001953, |
|
"logps/generated": -286.2010498046875, |
|
"logps/real": -238.5330352783203, |
|
"loss": 0.1431, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.8854296803474426, |
|
"rewards/margins": 2.840718984603882, |
|
"rewards/real": 1.9552892446517944, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.0272511848341227e-07, |
|
"logits/generated": 6.5136399269104, |
|
"logits/real": 5.123744487762451, |
|
"logps/generated": -277.1810607910156, |
|
"logps/real": -224.10684204101562, |
|
"loss": 0.1746, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -0.9058412313461304, |
|
"rewards/margins": 2.760077953338623, |
|
"rewards/real": 1.8542366027832031, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.0154028436018954e-07, |
|
"logits/generated": 6.532641410827637, |
|
"logits/real": 4.748272895812988, |
|
"logps/generated": -294.2531433105469, |
|
"logps/real": -227.92483520507812, |
|
"loss": 0.1391, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.3061493635177612, |
|
"rewards/margins": 3.458925724029541, |
|
"rewards/real": 2.152775764465332, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.003554502369668e-07, |
|
"logits/generated": 6.615334987640381, |
|
"logits/real": 4.404749393463135, |
|
"logps/generated": -301.03900146484375, |
|
"logps/real": -221.3147430419922, |
|
"loss": 0.1335, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.0821572542190552, |
|
"rewards/margins": 3.1977028846740723, |
|
"rewards/real": 2.1155455112457275, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.991706161137441e-07, |
|
"logits/generated": 6.418117523193359, |
|
"logits/real": 4.882086753845215, |
|
"logps/generated": -294.2748718261719, |
|
"logps/real": -232.966552734375, |
|
"loss": 0.133, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.2364888191223145, |
|
"rewards/margins": 3.057285785675049, |
|
"rewards/real": 1.8207969665527344, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.979857819905213e-07, |
|
"logits/generated": 6.474581241607666, |
|
"logits/real": 4.630919933319092, |
|
"logps/generated": -291.40313720703125, |
|
"logps/real": -231.16006469726562, |
|
"loss": 0.0996, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.1898620128631592, |
|
"rewards/margins": 3.224040985107422, |
|
"rewards/real": 2.0341789722442627, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.968009478672986e-07, |
|
"logits/generated": 6.528960227966309, |
|
"logits/real": 4.991686820983887, |
|
"logps/generated": -280.37518310546875, |
|
"logps/real": -234.42623901367188, |
|
"loss": 0.1544, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.0884606838226318, |
|
"rewards/margins": 3.3553130626678467, |
|
"rewards/real": 2.2668521404266357, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.9561611374407585e-07, |
|
"logits/generated": 6.5070061683654785, |
|
"logits/real": 4.700498104095459, |
|
"logps/generated": -285.8040466308594, |
|
"logps/real": -211.8716278076172, |
|
"loss": 0.15, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -1.127325177192688, |
|
"rewards/margins": 3.0416786670684814, |
|
"rewards/real": 1.914353370666504, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.94431279620853e-07, |
|
"logits/generated": 6.504839897155762, |
|
"logits/real": 4.721653938293457, |
|
"logps/generated": -281.8013610839844, |
|
"logps/real": -217.22647094726562, |
|
"loss": 0.1237, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.1646500825881958, |
|
"rewards/margins": 3.471494674682617, |
|
"rewards/real": 2.306844711303711, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.932464454976303e-07, |
|
"logits/generated": 6.476781368255615, |
|
"logits/real": 4.780390739440918, |
|
"logps/generated": -298.7755432128906, |
|
"logps/real": -242.40310668945312, |
|
"loss": 0.147, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.1053074598312378, |
|
"rewards/margins": 3.288050889968872, |
|
"rewards/real": 2.1827430725097656, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9206161137440757e-07, |
|
"logits/generated": 6.5268354415893555, |
|
"logits/real": 4.777252197265625, |
|
"logps/generated": -302.7318115234375, |
|
"logps/real": -233.9207305908203, |
|
"loss": 0.1532, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.3840343952178955, |
|
"rewards/margins": 3.5235512256622314, |
|
"rewards/real": 2.139516592025757, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.9087677725118484e-07, |
|
"logits/generated": 6.526573181152344, |
|
"logits/real": 4.747769355773926, |
|
"logps/generated": -291.31085205078125, |
|
"logps/real": -239.3638916015625, |
|
"loss": 0.1283, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.26791250705719, |
|
"rewards/margins": 3.175063133239746, |
|
"rewards/real": 1.9071502685546875, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8969194312796206e-07, |
|
"logits/generated": 6.308130264282227, |
|
"logits/real": 4.654840469360352, |
|
"logps/generated": -301.1564636230469, |
|
"logps/real": -234.5809326171875, |
|
"loss": 0.1158, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.2920730113983154, |
|
"rewards/margins": 3.129149913787842, |
|
"rewards/real": 1.8370771408081055, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8850710900473933e-07, |
|
"logits/generated": 6.504507541656494, |
|
"logits/real": 4.65291166305542, |
|
"logps/generated": -305.6498107910156, |
|
"logps/real": -233.3607635498047, |
|
"loss": 0.1156, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.4324750900268555, |
|
"rewards/margins": 3.3325772285461426, |
|
"rewards/real": 1.9001020193099976, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.873222748815166e-07, |
|
"logits/generated": 6.564852237701416, |
|
"logits/real": 4.4015302658081055, |
|
"logps/generated": -288.9748840332031, |
|
"logps/real": -222.2322235107422, |
|
"loss": 0.1362, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.43608820438385, |
|
"rewards/margins": 3.308760404586792, |
|
"rewards/real": 1.8726723194122314, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.8613744075829377e-07, |
|
"logits/generated": 6.503379821777344, |
|
"logits/real": 4.592303276062012, |
|
"logps/generated": -289.198486328125, |
|
"logps/real": -226.54165649414062, |
|
"loss": 0.1142, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.3943302631378174, |
|
"rewards/margins": 3.6817550659179688, |
|
"rewards/real": 2.2874248027801514, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8495260663507104e-07, |
|
"logits/generated": 6.395134925842285, |
|
"logits/real": 4.751669883728027, |
|
"logps/generated": -280.8811340332031, |
|
"logps/real": -227.56826782226562, |
|
"loss": 0.1107, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.2301466464996338, |
|
"rewards/margins": 3.3704135417938232, |
|
"rewards/real": 2.1402671337127686, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.837677725118483e-07, |
|
"logits/generated": 6.485627174377441, |
|
"logits/real": 4.733918190002441, |
|
"logps/generated": -306.0613708496094, |
|
"logps/real": -236.187744140625, |
|
"loss": 0.1028, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.2733852863311768, |
|
"rewards/margins": 3.3273322582244873, |
|
"rewards/real": 2.0539469718933105, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.825829383886256e-07, |
|
"logits/generated": 6.625826358795166, |
|
"logits/real": 4.502466201782227, |
|
"logps/generated": -302.5285339355469, |
|
"logps/real": -191.37582397460938, |
|
"loss": 0.126, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -1.4649912118911743, |
|
"rewards/margins": 3.6144001483917236, |
|
"rewards/real": 2.149409532546997, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.813981042654028e-07, |
|
"logits/generated": 6.492133140563965, |
|
"logits/real": 4.759311199188232, |
|
"logps/generated": -302.1866149902344, |
|
"logps/real": -230.4091796875, |
|
"loss": 0.1152, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.367823600769043, |
|
"rewards/margins": 3.351057529449463, |
|
"rewards/real": 1.9832338094711304, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.802132701421801e-07, |
|
"logits/generated": 6.473480224609375, |
|
"logits/real": 4.9179205894470215, |
|
"logps/generated": -292.11358642578125, |
|
"logps/real": -235.0968017578125, |
|
"loss": 0.1294, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.268405795097351, |
|
"rewards/margins": 3.70072603225708, |
|
"rewards/real": 2.4323201179504395, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.7902843601895736e-07, |
|
"logits/generated": 6.590761661529541, |
|
"logits/real": 4.755660057067871, |
|
"logps/generated": -283.9560852050781, |
|
"logps/real": -239.81893920898438, |
|
"loss": 0.1209, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -1.3544660806655884, |
|
"rewards/margins": 3.5569427013397217, |
|
"rewards/real": 2.202476978302002, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.778436018957346e-07, |
|
"logits/generated": 6.543065071105957, |
|
"logits/real": 4.660527229309082, |
|
"logps/generated": -293.5489501953125, |
|
"logps/real": -231.8582305908203, |
|
"loss": 0.1171, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.3222100734710693, |
|
"rewards/margins": 3.1835999488830566, |
|
"rewards/real": 1.8613903522491455, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.766587677725118e-07, |
|
"logits/generated": 6.483323097229004, |
|
"logits/real": 4.6694488525390625, |
|
"logps/generated": -292.2344665527344, |
|
"logps/real": -229.16067504882812, |
|
"loss": 0.1245, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.535994052886963, |
|
"rewards/margins": 3.5554702281951904, |
|
"rewards/real": 2.0194761753082275, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7547393364928907e-07, |
|
"logits/generated": 6.537093162536621, |
|
"logits/real": 4.834275722503662, |
|
"logps/generated": -283.90765380859375, |
|
"logps/real": -221.65771484375, |
|
"loss": 0.1043, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.336133599281311, |
|
"rewards/margins": 3.4120376110076904, |
|
"rewards/real": 2.075904369354248, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.7428909952606634e-07, |
|
"logits/generated": 6.584852695465088, |
|
"logits/real": 4.697403907775879, |
|
"logps/generated": -293.49371337890625, |
|
"logps/real": -230.45291137695312, |
|
"loss": 0.1223, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.4560847282409668, |
|
"rewards/margins": 3.7806944847106934, |
|
"rewards/real": 2.3246102333068848, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7310426540284356e-07, |
|
"logits/generated": 6.4898881912231445, |
|
"logits/real": 4.787100791931152, |
|
"logps/generated": -289.96502685546875, |
|
"logps/real": -235.83511352539062, |
|
"loss": 0.1452, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -1.4755102396011353, |
|
"rewards/margins": 3.804914951324463, |
|
"rewards/real": 2.329404592514038, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7191943127962083e-07, |
|
"logits/generated": 6.573674201965332, |
|
"logits/real": 4.813099384307861, |
|
"logps/generated": -295.05950927734375, |
|
"logps/real": -239.4409942626953, |
|
"loss": 0.1134, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.601508378982544, |
|
"rewards/margins": 3.6218199729919434, |
|
"rewards/real": 2.0203115940093994, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.707345971563981e-07, |
|
"logits/generated": 6.518675804138184, |
|
"logits/real": 4.694587707519531, |
|
"logps/generated": -288.26116943359375, |
|
"logps/real": -216.22702026367188, |
|
"loss": 0.1253, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.3888970613479614, |
|
"rewards/margins": 3.527122974395752, |
|
"rewards/real": 2.1382253170013428, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.695497630331754e-07, |
|
"logits/generated": 6.476728916168213, |
|
"logits/real": 4.867232322692871, |
|
"logps/generated": -290.8531494140625, |
|
"logps/real": -223.3974151611328, |
|
"loss": 0.1379, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.48335599899292, |
|
"rewards/margins": 3.3799889087677, |
|
"rewards/real": 1.8966329097747803, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.683649289099526e-07, |
|
"logits/generated": 6.590670585632324, |
|
"logits/real": 4.584352493286133, |
|
"logps/generated": -280.3003845214844, |
|
"logps/real": -222.9787139892578, |
|
"loss": 0.1055, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.518668532371521, |
|
"rewards/margins": 3.503495454788208, |
|
"rewards/real": 1.9848268032073975, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.671800947867298e-07, |
|
"logits/generated": 6.4740777015686035, |
|
"logits/real": 4.666982173919678, |
|
"logps/generated": -293.92950439453125, |
|
"logps/real": -241.7804718017578, |
|
"loss": 0.0979, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.4945917129516602, |
|
"rewards/margins": 3.7217907905578613, |
|
"rewards/real": 2.227198839187622, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.659952606635071e-07, |
|
"logits/generated": 6.5625457763671875, |
|
"logits/real": 4.97821044921875, |
|
"logps/generated": -287.527099609375, |
|
"logps/real": -244.9476776123047, |
|
"loss": 0.1185, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -1.5237135887145996, |
|
"rewards/margins": 3.7011260986328125, |
|
"rewards/real": 2.177412509918213, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.648104265402843e-07, |
|
"logits/generated": 6.531602382659912, |
|
"logits/real": 5.296113014221191, |
|
"logps/generated": -283.7915954589844, |
|
"logps/real": -241.6884765625, |
|
"loss": 0.1083, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -1.42214834690094, |
|
"rewards/margins": 3.4255123138427734, |
|
"rewards/real": 2.003364086151123, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.636255924170616e-07, |
|
"logits/generated": 6.558846473693848, |
|
"logits/real": 4.663855075836182, |
|
"logps/generated": -295.4413757324219, |
|
"logps/real": -231.3755340576172, |
|
"loss": 0.1392, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/generated": -1.4816911220550537, |
|
"rewards/margins": 3.7079098224639893, |
|
"rewards/real": 2.2262187004089355, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6244075829383886e-07, |
|
"logits/generated": 6.471614837646484, |
|
"logits/real": 4.710072994232178, |
|
"logps/generated": -288.50982666015625, |
|
"logps/real": -242.4681854248047, |
|
"loss": 0.133, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -1.2834060192108154, |
|
"rewards/margins": 3.245429277420044, |
|
"rewards/real": 1.962023377418518, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.6125592417061613e-07, |
|
"logits/generated": 6.597050666809082, |
|
"logits/real": 4.699416160583496, |
|
"logps/generated": -309.9823303222656, |
|
"logps/real": -241.0970001220703, |
|
"loss": 0.1212, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.800374984741211, |
|
"rewards/margins": 3.8445522785186768, |
|
"rewards/real": 2.044177532196045, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.6007109004739335e-07, |
|
"logits/generated": 6.396651744842529, |
|
"logits/real": 4.873335838317871, |
|
"logps/generated": -302.5182800292969, |
|
"logps/real": -225.38162231445312, |
|
"loss": 0.096, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.7537730932235718, |
|
"rewards/margins": 3.5806782245635986, |
|
"rewards/real": 1.8269050121307373, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.588862559241706e-07, |
|
"logits/generated": 6.512624263763428, |
|
"logits/real": 4.873417854309082, |
|
"logps/generated": -310.1457824707031, |
|
"logps/real": -221.52127075195312, |
|
"loss": 0.1038, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -1.995270013809204, |
|
"rewards/margins": 3.832683563232422, |
|
"rewards/real": 1.8374135494232178, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.5770142180094784e-07, |
|
"logits/generated": 6.3861165046691895, |
|
"logits/real": 4.902550220489502, |
|
"logps/generated": -309.2914123535156, |
|
"logps/real": -254.0144805908203, |
|
"loss": 0.0882, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.0285847187042236, |
|
"rewards/margins": 4.352484703063965, |
|
"rewards/real": 2.323899745941162, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.5651658767772506e-07, |
|
"logits/generated": 6.3969197273254395, |
|
"logits/real": 4.497953414916992, |
|
"logps/generated": -282.2160949707031, |
|
"logps/real": -221.9111328125, |
|
"loss": 0.1083, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -1.500212550163269, |
|
"rewards/margins": 3.4251670837402344, |
|
"rewards/real": 1.9249544143676758, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.5533175355450234e-07, |
|
"logits/generated": 6.489283084869385, |
|
"logits/real": 5.103555202484131, |
|
"logps/generated": -296.80084228515625, |
|
"logps/real": -254.9130859375, |
|
"loss": 0.1188, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.7659108638763428, |
|
"rewards/margins": 3.7144954204559326, |
|
"rewards/real": 1.948584794998169, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.541469194312796e-07, |
|
"logits/generated": 6.5424089431762695, |
|
"logits/real": 4.5776543617248535, |
|
"logps/generated": -306.41778564453125, |
|
"logps/real": -229.9923553466797, |
|
"loss": 0.0874, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.5843690633773804, |
|
"rewards/margins": 3.7243666648864746, |
|
"rewards/real": 2.139997959136963, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.529620853080569e-07, |
|
"logits/generated": 6.53662109375, |
|
"logits/real": 4.515005588531494, |
|
"logps/generated": -281.1587829589844, |
|
"logps/real": -198.87599182128906, |
|
"loss": 0.1229, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -1.793796181678772, |
|
"rewards/margins": 3.6370015144348145, |
|
"rewards/real": 1.8432050943374634, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.517772511848341e-07, |
|
"logits/generated": 6.402149200439453, |
|
"logits/real": 5.097185134887695, |
|
"logps/generated": -300.73248291015625, |
|
"logps/real": -269.5830078125, |
|
"loss": 0.0861, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.8012573719024658, |
|
"rewards/margins": 4.034179210662842, |
|
"rewards/real": 2.232922077178955, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.505924170616114e-07, |
|
"logits/generated": 6.531800746917725, |
|
"logits/real": 4.443795204162598, |
|
"logps/generated": -297.3340759277344, |
|
"logps/real": -225.61459350585938, |
|
"loss": 0.0949, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.9287163019180298, |
|
"rewards/margins": 4.080899715423584, |
|
"rewards/real": 2.1521835327148438, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.4940758293838865e-07, |
|
"logits/generated": 6.565011501312256, |
|
"logits/real": 4.78118371963501, |
|
"logps/generated": -300.0439453125, |
|
"logps/real": -232.56979370117188, |
|
"loss": 0.0998, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.7687839269638062, |
|
"rewards/margins": 3.690317153930664, |
|
"rewards/real": 1.921533226966858, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.482227488151658e-07, |
|
"logits/generated": 6.521812438964844, |
|
"logits/real": 4.503942489624023, |
|
"logps/generated": -304.3191223144531, |
|
"logps/real": -216.3659210205078, |
|
"loss": 0.105, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -1.9444500207901, |
|
"rewards/margins": 4.122984886169434, |
|
"rewards/real": 2.178535223007202, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.470379146919431e-07, |
|
"logits/generated": 6.601067543029785, |
|
"logits/real": 4.580229759216309, |
|
"logps/generated": -304.520751953125, |
|
"logps/real": -218.858154296875, |
|
"loss": 0.1114, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -1.790876030921936, |
|
"rewards/margins": 3.8865456581115723, |
|
"rewards/real": 2.0956692695617676, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.4585308056872036e-07, |
|
"logits/generated": 6.4673662185668945, |
|
"logits/real": 4.512519836425781, |
|
"logps/generated": -309.33563232421875, |
|
"logps/real": -237.4860076904297, |
|
"loss": 0.0852, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.074862003326416, |
|
"rewards/margins": 4.4387288093566895, |
|
"rewards/real": 2.3638668060302734, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4466824644549763e-07, |
|
"logits/generated": 6.52877140045166, |
|
"logits/real": 4.660586833953857, |
|
"logps/generated": -292.84735107421875, |
|
"logps/real": -235.4850616455078, |
|
"loss": 0.0748, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.910387635231018, |
|
"rewards/margins": 4.243655204772949, |
|
"rewards/real": 2.3332676887512207, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.4348341232227485e-07, |
|
"logits/generated": 6.508902549743652, |
|
"logits/real": 5.103868007659912, |
|
"logps/generated": -309.3510437011719, |
|
"logps/real": -261.96673583984375, |
|
"loss": 0.1137, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.9301677942276, |
|
"rewards/margins": 3.7662911415100098, |
|
"rewards/real": 1.8361234664916992, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.422985781990521e-07, |
|
"logits/generated": 6.578322410583496, |
|
"logits/real": 4.619819641113281, |
|
"logps/generated": -309.3769836425781, |
|
"logps/real": -230.23489379882812, |
|
"loss": 0.0928, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.1213955879211426, |
|
"rewards/margins": 4.048805236816406, |
|
"rewards/real": 1.9274095296859741, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.411137440758294e-07, |
|
"logits/generated": 6.638535499572754, |
|
"logits/real": 4.866148948669434, |
|
"logps/generated": -297.40826416015625, |
|
"logps/real": -226.35610961914062, |
|
"loss": 0.0875, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.079007625579834, |
|
"rewards/margins": 4.372213840484619, |
|
"rewards/real": 2.293206214904785, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.3992890995260667e-07, |
|
"logits/generated": 6.590002536773682, |
|
"logits/real": 4.547215461730957, |
|
"logps/generated": -288.596435546875, |
|
"logps/real": -198.56236267089844, |
|
"loss": 0.0912, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.8445419073104858, |
|
"rewards/margins": 3.7398104667663574, |
|
"rewards/real": 1.8952690362930298, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.3874407582938384e-07, |
|
"logits/generated": 6.5464019775390625, |
|
"logits/real": 4.549433708190918, |
|
"logps/generated": -316.94287109375, |
|
"logps/real": -230.5665283203125, |
|
"loss": 0.0736, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.1460461616516113, |
|
"rewards/margins": 4.4059224128723145, |
|
"rewards/real": 2.259875774383545, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.375592417061611e-07, |
|
"logits/generated": 6.43328332901001, |
|
"logits/real": 4.823711395263672, |
|
"logps/generated": -301.39202880859375, |
|
"logps/real": -236.8686065673828, |
|
"loss": 0.0918, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.8650691509246826, |
|
"rewards/margins": 4.1491899490356445, |
|
"rewards/real": 2.284120559692383, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.363744075829384e-07, |
|
"logits/generated": 6.538866996765137, |
|
"logits/real": 4.6786723136901855, |
|
"logps/generated": -291.01483154296875, |
|
"logps/real": -196.77285766601562, |
|
"loss": 0.0739, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.9935919046401978, |
|
"rewards/margins": 4.511776447296143, |
|
"rewards/real": 2.518184185028076, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.351895734597156e-07, |
|
"logits/generated": 6.427986145019531, |
|
"logits/real": 4.777734279632568, |
|
"logps/generated": -291.7196350097656, |
|
"logps/real": -219.09597778320312, |
|
"loss": 0.1, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.9151489734649658, |
|
"rewards/margins": 3.943358898162842, |
|
"rewards/real": 2.028210163116455, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.340047393364929e-07, |
|
"logits/generated": 6.557374477386475, |
|
"logits/real": 4.517666339874268, |
|
"logps/generated": -293.89178466796875, |
|
"logps/real": -231.68222045898438, |
|
"loss": 0.0845, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.155890703201294, |
|
"rewards/margins": 4.3398518562316895, |
|
"rewards/real": 2.1839611530303955, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.3281990521327015e-07, |
|
"logits/generated": 6.494599342346191, |
|
"logits/real": 4.4522833824157715, |
|
"logps/generated": -309.76556396484375, |
|
"logps/real": -219.76806640625, |
|
"loss": 0.0856, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.3567099571228027, |
|
"rewards/margins": 4.560822486877441, |
|
"rewards/real": 2.2041122913360596, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.316350710900474e-07, |
|
"logits/generated": 6.508890628814697, |
|
"logits/real": 4.725305080413818, |
|
"logps/generated": -294.90533447265625, |
|
"logps/real": -225.0491180419922, |
|
"loss": 0.1116, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -1.9600846767425537, |
|
"rewards/margins": 3.896559476852417, |
|
"rewards/real": 1.9364748001098633, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.304502369668246e-07, |
|
"logits/generated": 6.483962059020996, |
|
"logits/real": 4.7496185302734375, |
|
"logps/generated": -294.6922607421875, |
|
"logps/real": -234.5235137939453, |
|
"loss": 0.0806, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.0787155628204346, |
|
"rewards/margins": 4.212123394012451, |
|
"rewards/real": 2.1334080696105957, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.2926540284360186e-07, |
|
"logits/generated": 6.553183078765869, |
|
"logits/real": 4.5973711013793945, |
|
"logps/generated": -290.69122314453125, |
|
"logps/real": -224.2595977783203, |
|
"loss": 0.0786, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.1069176197052, |
|
"rewards/margins": 4.262394905090332, |
|
"rewards/real": 2.155477523803711, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.2808056872037913e-07, |
|
"logits/generated": 6.475919246673584, |
|
"logits/real": 4.781458854675293, |
|
"logps/generated": -293.8143615722656, |
|
"logps/real": -242.02157592773438, |
|
"loss": 0.0815, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.0358715057373047, |
|
"rewards/margins": 4.052926063537598, |
|
"rewards/real": 2.017054796218872, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.2689573459715635e-07, |
|
"logits/generated": 6.587837219238281, |
|
"logits/real": 4.240110874176025, |
|
"logps/generated": -300.80084228515625, |
|
"logps/real": -197.26913452148438, |
|
"loss": 0.0862, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.1164047718048096, |
|
"rewards/margins": 4.440495014190674, |
|
"rewards/real": 2.3240902423858643, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.2571090047393363e-07, |
|
"logits/generated": 6.373067855834961, |
|
"logits/real": 4.968833923339844, |
|
"logps/generated": -313.5643005371094, |
|
"logps/real": -238.37448120117188, |
|
"loss": 0.0616, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.1593971252441406, |
|
"rewards/margins": 4.199219703674316, |
|
"rewards/real": 2.0398221015930176, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.245260663507109e-07, |
|
"logits/generated": 6.488961219787598, |
|
"logits/real": 4.785304069519043, |
|
"logps/generated": -305.1136474609375, |
|
"logps/real": -226.7467498779297, |
|
"loss": 0.0907, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.0184876918792725, |
|
"rewards/margins": 4.260189056396484, |
|
"rewards/real": 2.241701126098633, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2334123222748817e-07, |
|
"logits/generated": 6.571511745452881, |
|
"logits/real": 4.719809055328369, |
|
"logps/generated": -288.8949279785156, |
|
"logps/real": -227.11788940429688, |
|
"loss": 0.0826, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.8733266592025757, |
|
"rewards/margins": 4.297513484954834, |
|
"rewards/real": 2.424187183380127, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.221563981042654e-07, |
|
"logits/generated": 6.46596622467041, |
|
"logits/real": 5.228185653686523, |
|
"logps/generated": -317.7358703613281, |
|
"logps/real": -263.435791015625, |
|
"loss": 0.0856, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.4127206802368164, |
|
"rewards/margins": 4.111409664154053, |
|
"rewards/real": 1.6986888647079468, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.209715639810426e-07, |
|
"logits/generated": 6.481564998626709, |
|
"logits/real": 4.614343166351318, |
|
"logps/generated": -310.3409729003906, |
|
"logps/real": -232.2714385986328, |
|
"loss": 0.0894, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/generated": -2.1927568912506104, |
|
"rewards/margins": 4.123000144958496, |
|
"rewards/real": 1.930242896080017, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.197867298578199e-07, |
|
"logits/generated": 6.458526611328125, |
|
"logits/real": 4.698050498962402, |
|
"logps/generated": -300.57391357421875, |
|
"logps/real": -227.27169799804688, |
|
"loss": 0.0675, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.231247901916504, |
|
"rewards/margins": 4.3048787117004395, |
|
"rewards/real": 2.0736308097839355, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.186018957345971e-07, |
|
"logits/generated": 6.454297065734863, |
|
"logits/real": 4.6443586349487305, |
|
"logps/generated": -299.6277160644531, |
|
"logps/real": -228.95370483398438, |
|
"loss": 0.0791, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.205613613128662, |
|
"rewards/margins": 4.489793300628662, |
|
"rewards/real": 2.2841796875, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.174170616113744e-07, |
|
"logits/generated": 6.567930698394775, |
|
"logits/real": 4.493962287902832, |
|
"logps/generated": -296.0806579589844, |
|
"logps/real": -206.06979370117188, |
|
"loss": 0.0601, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.3037962913513184, |
|
"rewards/margins": 4.68963623046875, |
|
"rewards/real": 2.3858392238616943, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.1623222748815165e-07, |
|
"logits/generated": 6.372540473937988, |
|
"logits/real": 4.720137596130371, |
|
"logps/generated": -283.2089538574219, |
|
"logps/real": -228.1040802001953, |
|
"loss": 0.0764, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.127713918685913, |
|
"rewards/margins": 4.436669826507568, |
|
"rewards/real": 2.3089561462402344, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.150473933649289e-07, |
|
"logits/generated": 6.5518317222595215, |
|
"logits/real": 4.7907562255859375, |
|
"logps/generated": -305.01019287109375, |
|
"logps/real": -233.3303985595703, |
|
"loss": 0.0768, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.040151834487915, |
|
"rewards/margins": 4.2342705726623535, |
|
"rewards/real": 2.1941189765930176, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.1386255924170614e-07, |
|
"logits/generated": 6.603424072265625, |
|
"logits/real": 4.3683857917785645, |
|
"logps/generated": -288.2603759765625, |
|
"logps/real": -198.78309631347656, |
|
"loss": 0.0797, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.926896333694458, |
|
"rewards/margins": 4.150208950042725, |
|
"rewards/real": 2.2233126163482666, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.126777251184834e-07, |
|
"logits/generated": 6.489920616149902, |
|
"logits/real": 4.773646831512451, |
|
"logps/generated": -312.36236572265625, |
|
"logps/real": -234.63076782226562, |
|
"loss": 0.062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.2082715034484863, |
|
"rewards/margins": 4.475545883178711, |
|
"rewards/real": 2.2672739028930664, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.1149289099526064e-07, |
|
"logits/generated": 6.615997314453125, |
|
"logits/real": 4.535744667053223, |
|
"logps/generated": -283.09326171875, |
|
"logps/real": -217.5418701171875, |
|
"loss": 0.0765, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -2.2303497791290283, |
|
"rewards/margins": 4.6097259521484375, |
|
"rewards/real": 2.3793764114379883, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.103080568720379e-07, |
|
"logits/generated": 6.409584045410156, |
|
"logits/real": 4.447390556335449, |
|
"logps/generated": -290.5548095703125, |
|
"logps/real": -227.99038696289062, |
|
"loss": 0.0963, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.0898966789245605, |
|
"rewards/margins": 4.26753568649292, |
|
"rewards/real": 2.1776394844055176, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.0912322274881513e-07, |
|
"logits/generated": 6.556498050689697, |
|
"logits/real": 4.974642753601074, |
|
"logps/generated": -300.2599182128906, |
|
"logps/real": -241.9363555908203, |
|
"loss": 0.079, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.5023233890533447, |
|
"rewards/margins": 4.799649238586426, |
|
"rewards/real": 2.297325372695923, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.079383886255924e-07, |
|
"logits/generated": 6.597992897033691, |
|
"logits/real": 4.567930698394775, |
|
"logps/generated": -311.3286437988281, |
|
"logps/real": -219.1073455810547, |
|
"loss": 0.06, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.2045724391937256, |
|
"rewards/margins": 4.53138542175293, |
|
"rewards/real": 2.326812267303467, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.067535545023697e-07, |
|
"logits/generated": 6.559409141540527, |
|
"logits/real": 4.554577827453613, |
|
"logps/generated": -303.6340637207031, |
|
"logps/real": -208.86483764648438, |
|
"loss": 0.0687, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.4833426475524902, |
|
"rewards/margins": 4.425951957702637, |
|
"rewards/real": 1.9426090717315674, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.055687203791469e-07, |
|
"logits/generated": 6.533651828765869, |
|
"logits/real": 4.764289379119873, |
|
"logps/generated": -295.6280822753906, |
|
"logps/real": -224.78646850585938, |
|
"loss": 0.0814, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.3707690238952637, |
|
"rewards/margins": 4.8221025466918945, |
|
"rewards/real": 2.451333522796631, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.0438388625592417e-07, |
|
"logits/generated": 6.513457298278809, |
|
"logits/real": 4.793578147888184, |
|
"logps/generated": -311.8963928222656, |
|
"logps/real": -224.7337646484375, |
|
"loss": 0.0821, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.5289576053619385, |
|
"rewards/margins": 4.804759502410889, |
|
"rewards/real": 2.2758023738861084, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.0319905213270144e-07, |
|
"logits/generated": 6.621163368225098, |
|
"logits/real": 4.561090469360352, |
|
"logps/generated": -303.7142028808594, |
|
"logps/real": -222.3753662109375, |
|
"loss": 0.0836, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.326003074645996, |
|
"rewards/margins": 4.459982872009277, |
|
"rewards/real": 2.1339797973632812, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.0201421800947866e-07, |
|
"logits/generated": 6.580770969390869, |
|
"logits/real": 4.773768901824951, |
|
"logps/generated": -311.6327209472656, |
|
"logps/real": -249.0811309814453, |
|
"loss": 0.0835, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -2.3817009925842285, |
|
"rewards/margins": 4.315882682800293, |
|
"rewards/real": 1.934181809425354, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.008293838862559e-07, |
|
"logits/generated": 6.521528720855713, |
|
"logits/real": 4.7063798904418945, |
|
"logps/generated": -294.9180908203125, |
|
"logps/real": -223.62643432617188, |
|
"loss": 0.1009, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -2.208840847015381, |
|
"rewards/margins": 4.619427680969238, |
|
"rewards/real": 2.4105873107910156, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.9964454976303315e-07, |
|
"logits/generated": 6.510763645172119, |
|
"logits/real": 4.770654201507568, |
|
"logps/generated": -306.3185729980469, |
|
"logps/real": -223.23654174804688, |
|
"loss": 0.057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.2273476123809814, |
|
"rewards/margins": 4.425220966339111, |
|
"rewards/real": 2.197873592376709, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.984597156398104e-07, |
|
"logits/generated": 6.569235801696777, |
|
"logits/real": 4.330723762512207, |
|
"logps/generated": -300.669677734375, |
|
"logps/real": -213.9007568359375, |
|
"loss": 0.0789, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.5640532970428467, |
|
"rewards/margins": 4.792849540710449, |
|
"rewards/real": 2.2287967205047607, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.9727488151658765e-07, |
|
"logits/generated": 6.663917541503906, |
|
"logits/real": 4.72244119644165, |
|
"logps/generated": -314.7903747558594, |
|
"logps/real": -252.47933959960938, |
|
"loss": 0.0659, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.60965895652771, |
|
"rewards/margins": 4.690103054046631, |
|
"rewards/real": 2.080443859100342, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.960900473933649e-07, |
|
"logits/generated": 6.465506553649902, |
|
"logits/real": 5.010118007659912, |
|
"logps/generated": -297.9461975097656, |
|
"logps/real": -224.8966522216797, |
|
"loss": 0.0514, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.0201261043548584, |
|
"rewards/margins": 4.045393943786621, |
|
"rewards/real": 2.025268077850342, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.949052132701422e-07, |
|
"logits/generated": 6.5695037841796875, |
|
"logits/real": 4.864360332489014, |
|
"logps/generated": -300.3662414550781, |
|
"logps/real": -243.6785888671875, |
|
"loss": 0.0647, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -2.213184118270874, |
|
"rewards/margins": 4.579488754272461, |
|
"rewards/real": 2.366304397583008, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.9372037914691946e-07, |
|
"logits/generated": 6.518437385559082, |
|
"logits/real": 4.748892784118652, |
|
"logps/generated": -309.49658203125, |
|
"logps/real": -216.10751342773438, |
|
"loss": 0.0583, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.4229207038879395, |
|
"rewards/margins": 4.442441463470459, |
|
"rewards/real": 2.0195205211639404, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.9253554502369663e-07, |
|
"logits/generated": 6.526637077331543, |
|
"logits/real": 4.67498779296875, |
|
"logps/generated": -313.6875, |
|
"logps/real": -231.32199096679688, |
|
"loss": 0.0762, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -2.6978516578674316, |
|
"rewards/margins": 4.7451090812683105, |
|
"rewards/real": 2.0472571849823, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.913507109004739e-07, |
|
"logits/generated": 6.5197906494140625, |
|
"logits/real": 4.786774635314941, |
|
"logps/generated": -300.3356628417969, |
|
"logps/real": -238.114501953125, |
|
"loss": 0.07, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.6963155269622803, |
|
"rewards/margins": 5.061102867126465, |
|
"rewards/real": 2.3647871017456055, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.901658767772512e-07, |
|
"logits/generated": 6.281010627746582, |
|
"logits/real": 4.850646495819092, |
|
"logps/generated": -311.21044921875, |
|
"logps/real": -224.34005737304688, |
|
"loss": 0.0678, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.614072561264038, |
|
"rewards/margins": 4.778448104858398, |
|
"rewards/real": 2.1643755435943604, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.889810426540284e-07, |
|
"logits/generated": 6.2719526290893555, |
|
"logits/real": 4.559032440185547, |
|
"logps/generated": -300.2083435058594, |
|
"logps/real": -220.322998046875, |
|
"loss": 0.0626, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.7301433086395264, |
|
"rewards/margins": 5.041817665100098, |
|
"rewards/real": 2.311674118041992, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8779620853080567e-07, |
|
"logits/generated": 6.567207336425781, |
|
"logits/real": 4.634784698486328, |
|
"logps/generated": -306.6617736816406, |
|
"logps/real": -223.53744506835938, |
|
"loss": 0.0634, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.5135562419891357, |
|
"rewards/margins": 4.631071090698242, |
|
"rewards/real": 2.1175150871276855, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8661137440758294e-07, |
|
"logits/generated": 6.572225093841553, |
|
"logits/real": 5.128222465515137, |
|
"logps/generated": -305.1805419921875, |
|
"logps/real": -244.2073974609375, |
|
"loss": 0.0913, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -2.5788116455078125, |
|
"rewards/margins": 4.599055767059326, |
|
"rewards/real": 2.0202441215515137, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.854265402843602e-07, |
|
"logits/generated": 6.486606597900391, |
|
"logits/real": 4.381880283355713, |
|
"logps/generated": -299.26226806640625, |
|
"logps/real": -204.21128845214844, |
|
"loss": 0.0607, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.690892219543457, |
|
"rewards/margins": 4.859349727630615, |
|
"rewards/real": 2.168457508087158, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.842417061611374e-07, |
|
"logits/generated": 6.467171669006348, |
|
"logits/real": 4.881519317626953, |
|
"logps/generated": -316.6208801269531, |
|
"logps/real": -249.66964721679688, |
|
"loss": 0.0561, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.9214892387390137, |
|
"rewards/margins": 4.928795337677002, |
|
"rewards/real": 2.0073063373565674, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8305687203791465e-07, |
|
"logits/generated": 6.373992443084717, |
|
"logits/real": 4.881070613861084, |
|
"logps/generated": -304.5431823730469, |
|
"logps/real": -238.75918579101562, |
|
"loss": 0.0637, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.7934460639953613, |
|
"rewards/margins": 5.073416709899902, |
|
"rewards/real": 2.279970645904541, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8187203791469193e-07, |
|
"logits/generated": 6.459514617919922, |
|
"logits/real": 4.823994159698486, |
|
"logps/generated": -297.67156982421875, |
|
"logps/real": -244.6254425048828, |
|
"loss": 0.07, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.617419958114624, |
|
"rewards/margins": 4.747994422912598, |
|
"rewards/real": 2.1305739879608154, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.806872037914692e-07, |
|
"logits/generated": 6.545841217041016, |
|
"logits/real": 4.525175094604492, |
|
"logps/generated": -300.8380126953125, |
|
"logps/real": -230.91641235351562, |
|
"loss": 0.0413, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.621128797531128, |
|
"rewards/margins": 4.8851799964904785, |
|
"rewards/real": 2.2640514373779297, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.795023696682464e-07, |
|
"logits/generated": 6.552148342132568, |
|
"logits/real": 4.585801601409912, |
|
"logps/generated": -291.0857238769531, |
|
"logps/real": -203.3738250732422, |
|
"loss": 0.0633, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -2.501784324645996, |
|
"rewards/margins": 4.606094837188721, |
|
"rewards/real": 2.1043105125427246, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.783175355450237e-07, |
|
"logits/generated": 6.5855712890625, |
|
"logits/real": 4.8127546310424805, |
|
"logps/generated": -304.4456787109375, |
|
"logps/real": -241.84164428710938, |
|
"loss": 0.0662, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.4264376163482666, |
|
"rewards/margins": 4.380554676055908, |
|
"rewards/real": 1.9541168212890625, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7713270142180097e-07, |
|
"logits/generated": 6.61553955078125, |
|
"logits/real": 4.842226982116699, |
|
"logps/generated": -304.24908447265625, |
|
"logps/real": -228.1985321044922, |
|
"loss": 0.0723, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.668332815170288, |
|
"rewards/margins": 4.606504917144775, |
|
"rewards/real": 1.9381721019744873, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.759478672985782e-07, |
|
"logits/generated": 6.651033878326416, |
|
"logits/real": 5.216360569000244, |
|
"logps/generated": -299.9844665527344, |
|
"logps/real": -245.30838012695312, |
|
"loss": 0.058, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.7325878143310547, |
|
"rewards/margins": 4.621773719787598, |
|
"rewards/real": 1.8891855478286743, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.747630331753554e-07, |
|
"logits/generated": 6.4999237060546875, |
|
"logits/real": 4.944417476654053, |
|
"logps/generated": -296.3262023925781, |
|
"logps/real": -230.99472045898438, |
|
"loss": 0.0709, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.5117669105529785, |
|
"rewards/margins": 4.533594131469727, |
|
"rewards/real": 2.021827220916748, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.735781990521327e-07, |
|
"logits/generated": 6.586300849914551, |
|
"logits/real": 4.376750946044922, |
|
"logps/generated": -308.04730224609375, |
|
"logps/real": -199.63783264160156, |
|
"loss": 0.0526, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.739675521850586, |
|
"rewards/margins": 5.009511947631836, |
|
"rewards/real": 2.26983642578125, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7239336492890995e-07, |
|
"logits/generated": 6.62445592880249, |
|
"logits/real": 4.475614547729492, |
|
"logps/generated": -299.97686767578125, |
|
"logps/real": -205.2351837158203, |
|
"loss": 0.0631, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.6585028171539307, |
|
"rewards/margins": 4.911370277404785, |
|
"rewards/real": 2.2528672218322754, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.7120853080568717e-07, |
|
"logits/generated": 6.5361738204956055, |
|
"logits/real": 4.921416282653809, |
|
"logps/generated": -320.28619384765625, |
|
"logps/real": -235.4938507080078, |
|
"loss": 0.0686, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.5308923721313477, |
|
"rewards/margins": 4.577965259552002, |
|
"rewards/real": 2.047072649002075, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.7002369668246444e-07, |
|
"logits/generated": 6.583050727844238, |
|
"logits/real": 4.50087833404541, |
|
"logps/generated": -305.26544189453125, |
|
"logps/real": -205.6936492919922, |
|
"loss": 0.0646, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.650360584259033, |
|
"rewards/margins": 4.845761299133301, |
|
"rewards/real": 2.195401191711426, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.688388625592417e-07, |
|
"logits/generated": 6.528054714202881, |
|
"logits/real": 4.9675092697143555, |
|
"logps/generated": -287.01385498046875, |
|
"logps/real": -239.06137084960938, |
|
"loss": 0.0695, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.433396577835083, |
|
"rewards/margins": 4.63083553314209, |
|
"rewards/real": 2.1974387168884277, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.6765402843601894e-07, |
|
"logits/generated": 6.572503089904785, |
|
"logits/real": 4.664327621459961, |
|
"logps/generated": -299.9050598144531, |
|
"logps/real": -226.3813934326172, |
|
"loss": 0.0589, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.615846633911133, |
|
"rewards/margins": 4.903604030609131, |
|
"rewards/real": 2.287757396697998, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.664691943127962e-07, |
|
"logits/generated": 6.500199794769287, |
|
"logits/real": 4.409898281097412, |
|
"logps/generated": -291.48956298828125, |
|
"logps/real": -221.5597686767578, |
|
"loss": 0.0677, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.5167105197906494, |
|
"rewards/margins": 4.5962815284729, |
|
"rewards/real": 2.079570770263672, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6528436018957343e-07, |
|
"logits/generated": 6.451045036315918, |
|
"logits/real": 4.783955097198486, |
|
"logps/generated": -297.8843688964844, |
|
"logps/real": -234.1428985595703, |
|
"loss": 0.0697, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.9517464637756348, |
|
"rewards/margins": 5.131245136260986, |
|
"rewards/real": 2.1794984340667725, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.640995260663507e-07, |
|
"logits/generated": 6.4251837730407715, |
|
"logits/real": 4.974338054656982, |
|
"logps/generated": -286.2950134277344, |
|
"logps/real": -229.6946258544922, |
|
"loss": 0.0595, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.7505643367767334, |
|
"rewards/margins": 4.802645683288574, |
|
"rewards/real": 2.05208158493042, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.629146919431279e-07, |
|
"logits/generated": 6.499301910400391, |
|
"logits/real": 4.888270378112793, |
|
"logps/generated": -313.7422180175781, |
|
"logps/real": -248.39376831054688, |
|
"loss": 0.0575, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.546126127243042, |
|
"rewards/margins": 4.463229179382324, |
|
"rewards/real": 1.9171028137207031, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.617298578199052e-07, |
|
"logits/generated": 6.561995506286621, |
|
"logits/real": 4.65811014175415, |
|
"logps/generated": -311.327392578125, |
|
"logps/real": -216.3298797607422, |
|
"loss": 0.057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.7726848125457764, |
|
"rewards/margins": 4.696637153625488, |
|
"rewards/real": 1.923952341079712, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6054502369668247e-07, |
|
"logits/generated": 6.564155578613281, |
|
"logits/real": 4.5207109451293945, |
|
"logps/generated": -285.0248107910156, |
|
"logps/real": -203.79513549804688, |
|
"loss": 0.0668, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -2.5576417446136475, |
|
"rewards/margins": 4.535670280456543, |
|
"rewards/real": 1.978028655052185, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.5936018957345974e-07, |
|
"logits/generated": 6.515015602111816, |
|
"logits/real": 4.583956241607666, |
|
"logps/generated": -298.4903869628906, |
|
"logps/real": -232.15805053710938, |
|
"loss": 0.0499, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.533477544784546, |
|
"rewards/margins": 4.886750221252441, |
|
"rewards/real": 2.353271961212158, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.5817535545023696e-07, |
|
"logits/generated": 6.423288822174072, |
|
"logits/real": 5.059484958648682, |
|
"logps/generated": -324.56365966796875, |
|
"logps/real": -248.84371948242188, |
|
"loss": 0.0654, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.873297691345215, |
|
"rewards/margins": 5.0330986976623535, |
|
"rewards/real": 2.1598012447357178, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.5699052132701423e-07, |
|
"logits/generated": 6.517401218414307, |
|
"logits/real": 5.171728134155273, |
|
"logps/generated": -307.1355285644531, |
|
"logps/real": -257.387451171875, |
|
"loss": 0.0434, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.787299633026123, |
|
"rewards/margins": 4.657351016998291, |
|
"rewards/real": 1.8700507879257202, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5580568720379145e-07, |
|
"logits/generated": 6.6013922691345215, |
|
"logits/real": 4.844089984893799, |
|
"logps/generated": -297.20379638671875, |
|
"logps/real": -221.24124145507812, |
|
"loss": 0.0468, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.8276898860931396, |
|
"rewards/margins": 4.962311744689941, |
|
"rewards/real": 2.1346216201782227, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.5462085308056867e-07, |
|
"logits/generated": 6.696247100830078, |
|
"logits/real": 4.772692680358887, |
|
"logps/generated": -315.21832275390625, |
|
"logps/real": -226.9287567138672, |
|
"loss": 0.0455, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.7525386810302734, |
|
"rewards/margins": 4.851990222930908, |
|
"rewards/real": 2.0994513034820557, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.5343601895734595e-07, |
|
"logits/generated": 6.510929107666016, |
|
"logits/real": 4.709242343902588, |
|
"logps/generated": -306.0609436035156, |
|
"logps/real": -211.63021850585938, |
|
"loss": 0.0599, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.842668056488037, |
|
"rewards/margins": 4.8463568687438965, |
|
"rewards/real": 2.0036888122558594, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.522511848341232e-07, |
|
"logits/generated": 6.442812442779541, |
|
"logits/real": 4.947168827056885, |
|
"logps/generated": -322.0133056640625, |
|
"logps/real": -240.52841186523438, |
|
"loss": 0.0578, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.7832953929901123, |
|
"rewards/margins": 4.885197639465332, |
|
"rewards/real": 2.101902484893799, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.510663507109005e-07, |
|
"logits/generated": 6.514994144439697, |
|
"logits/real": 4.442912578582764, |
|
"logps/generated": -313.75628662109375, |
|
"logps/real": -222.9632568359375, |
|
"loss": 0.0421, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.991903305053711, |
|
"rewards/margins": 5.337101936340332, |
|
"rewards/real": 2.3451991081237793, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.498815165876777e-07, |
|
"logits/generated": 6.605074405670166, |
|
"logits/real": 4.601679801940918, |
|
"logps/generated": -310.7535705566406, |
|
"logps/real": -230.5736541748047, |
|
"loss": 0.0501, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.7398934364318848, |
|
"rewards/margins": 5.229077339172363, |
|
"rewards/real": 2.4891839027404785, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.48696682464455e-07, |
|
"logits/generated": 6.653934478759766, |
|
"logits/real": 5.141819953918457, |
|
"logps/generated": -328.99005126953125, |
|
"logps/real": -241.0696563720703, |
|
"loss": 0.0697, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.0767884254455566, |
|
"rewards/margins": 5.069361686706543, |
|
"rewards/real": 1.9925737380981445, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.475118483412322e-07, |
|
"logits/generated": 6.622979164123535, |
|
"logits/real": 5.017902374267578, |
|
"logps/generated": -309.37762451171875, |
|
"logps/real": -230.4441375732422, |
|
"loss": 0.0612, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.974555253982544, |
|
"rewards/margins": 5.106658935546875, |
|
"rewards/real": 2.13210391998291, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.463270142180095e-07, |
|
"logits/generated": 6.475897312164307, |
|
"logits/real": 4.987759590148926, |
|
"logps/generated": -323.8994140625, |
|
"logps/real": -236.59725952148438, |
|
"loss": 0.0486, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.926840305328369, |
|
"rewards/margins": 5.029897212982178, |
|
"rewards/real": 2.1030571460723877, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.451421800947867e-07, |
|
"logits/generated": 6.513401985168457, |
|
"logits/real": 4.884097576141357, |
|
"logps/generated": -295.1630554199219, |
|
"logps/real": -219.2560272216797, |
|
"loss": 0.0509, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.579526662826538, |
|
"rewards/margins": 4.818698883056641, |
|
"rewards/real": 2.2391719818115234, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.4395734597156397e-07, |
|
"logits/generated": 6.569561958312988, |
|
"logits/real": 4.7536301612854, |
|
"logps/generated": -314.96917724609375, |
|
"logps/real": -241.3905487060547, |
|
"loss": 0.0581, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.9935827255249023, |
|
"rewards/margins": 5.2356743812561035, |
|
"rewards/real": 2.242091655731201, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.4277251184834124e-07, |
|
"logits/generated": 6.643240451812744, |
|
"logits/real": 4.672991752624512, |
|
"logps/generated": -298.296630859375, |
|
"logps/real": -218.02627563476562, |
|
"loss": 0.0567, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.751802921295166, |
|
"rewards/margins": 4.864652156829834, |
|
"rewards/real": 2.112849473953247, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.4158767772511846e-07, |
|
"logits/generated": 6.543112277984619, |
|
"logits/real": 4.794711112976074, |
|
"logps/generated": -300.0271911621094, |
|
"logps/real": -229.9580535888672, |
|
"loss": 0.0449, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.9769158363342285, |
|
"rewards/margins": 5.159039497375488, |
|
"rewards/real": 2.182124137878418, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.4040284360189573e-07, |
|
"logits/generated": 6.426865577697754, |
|
"logits/real": 5.070995807647705, |
|
"logps/generated": -312.7875061035156, |
|
"logps/real": -244.02859497070312, |
|
"loss": 0.0482, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.957658529281616, |
|
"rewards/margins": 4.830506801605225, |
|
"rewards/real": 1.8728487491607666, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.39218009478673e-07, |
|
"logits/generated": 6.447142601013184, |
|
"logits/real": 4.88741397857666, |
|
"logps/generated": -316.395751953125, |
|
"logps/real": -238.98989868164062, |
|
"loss": 0.0624, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.3095619678497314, |
|
"rewards/margins": 5.356574535369873, |
|
"rewards/real": 2.0470128059387207, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3803317535545023e-07, |
|
"logits/generated": 6.639400482177734, |
|
"logits/real": 4.574619293212891, |
|
"logps/generated": -311.22705078125, |
|
"logps/real": -213.94375610351562, |
|
"loss": 0.0609, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.9241037368774414, |
|
"rewards/margins": 5.196986675262451, |
|
"rewards/real": 2.2728826999664307, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.3684834123222747e-07, |
|
"logits/generated": 6.604647159576416, |
|
"logits/real": 4.500279426574707, |
|
"logps/generated": -297.166015625, |
|
"logps/real": -226.86227416992188, |
|
"loss": 0.0609, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.1100852489471436, |
|
"rewards/margins": 5.564393520355225, |
|
"rewards/real": 2.45430850982666, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.3566350710900475e-07, |
|
"logits/generated": 6.377281665802002, |
|
"logits/real": 4.427682399749756, |
|
"logps/generated": -303.52667236328125, |
|
"logps/real": -228.33834838867188, |
|
"loss": 0.0493, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.9995968341827393, |
|
"rewards/margins": 5.4720869064331055, |
|
"rewards/real": 2.4724905490875244, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.3447867298578197e-07, |
|
"logits/generated": 6.596705436706543, |
|
"logits/real": 4.664139747619629, |
|
"logps/generated": -299.83526611328125, |
|
"logps/real": -237.1983184814453, |
|
"loss": 0.0687, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.115382432937622, |
|
"rewards/margins": 5.5024261474609375, |
|
"rewards/real": 2.3870432376861572, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.3329383886255924e-07, |
|
"logits/generated": 6.471650123596191, |
|
"logits/real": 4.98661470413208, |
|
"logps/generated": -323.77984619140625, |
|
"logps/real": -239.13720703125, |
|
"loss": 0.04, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.5205624103546143, |
|
"rewards/margins": 6.0251383781433105, |
|
"rewards/real": 2.5045764446258545, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3210900473933649e-07, |
|
"logits/generated": 6.571569919586182, |
|
"logits/real": 5.169942855834961, |
|
"logps/generated": -310.2748107910156, |
|
"logps/real": -243.19113159179688, |
|
"loss": 0.0424, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.2248611450195312, |
|
"rewards/margins": 5.1551642417907715, |
|
"rewards/real": 1.9303032159805298, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.3092417061611373e-07, |
|
"logits/generated": 6.645476341247559, |
|
"logits/real": 4.323509693145752, |
|
"logps/generated": -305.4945373535156, |
|
"logps/real": -211.2071990966797, |
|
"loss": 0.0508, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.021256923675537, |
|
"rewards/margins": 5.193289756774902, |
|
"rewards/real": 2.1720330715179443, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.2973933649289098e-07, |
|
"logits/generated": 6.557646751403809, |
|
"logits/real": 4.875040531158447, |
|
"logps/generated": -320.00830078125, |
|
"logps/real": -238.9925994873047, |
|
"loss": 0.042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.284480333328247, |
|
"rewards/margins": 5.7841997146606445, |
|
"rewards/real": 2.4997196197509766, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.2855450236966822e-07, |
|
"logits/generated": 6.572206974029541, |
|
"logits/real": 4.644891262054443, |
|
"logps/generated": -314.3716735839844, |
|
"logps/real": -220.7062225341797, |
|
"loss": 0.0737, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.0081522464752197, |
|
"rewards/margins": 4.949704170227051, |
|
"rewards/real": 1.9415524005889893, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.273696682464455e-07, |
|
"logits/generated": 6.575781345367432, |
|
"logits/real": 4.972030162811279, |
|
"logps/generated": -304.5403137207031, |
|
"logps/real": -226.6322021484375, |
|
"loss": 0.0561, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.0886337757110596, |
|
"rewards/margins": 5.250014781951904, |
|
"rewards/real": 2.1613805294036865, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2618483412322272e-07, |
|
"logits/generated": 6.253432273864746, |
|
"logits/real": 4.807991981506348, |
|
"logps/generated": -308.74737548828125, |
|
"logps/real": -265.22088623046875, |
|
"loss": 0.058, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -3.068493366241455, |
|
"rewards/margins": 5.278921127319336, |
|
"rewards/real": 2.2104272842407227, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.25e-07, |
|
"logits/generated": 6.551450252532959, |
|
"logits/real": 5.10826301574707, |
|
"logps/generated": -307.3768615722656, |
|
"logps/real": -237.50149536132812, |
|
"loss": 0.0522, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.9784510135650635, |
|
"rewards/margins": 4.849761962890625, |
|
"rewards/real": 1.8713109493255615, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.2381516587677724e-07, |
|
"logits/generated": 6.531739234924316, |
|
"logits/real": 4.827146530151367, |
|
"logps/generated": -314.5278015136719, |
|
"logps/real": -241.0078887939453, |
|
"loss": 0.0595, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.0112459659576416, |
|
"rewards/margins": 4.8616437911987305, |
|
"rewards/real": 1.8503978252410889, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.226303317535545e-07, |
|
"logits/generated": 6.410187721252441, |
|
"logits/real": 5.148890495300293, |
|
"logps/generated": -312.85565185546875, |
|
"logps/real": -238.3211212158203, |
|
"loss": 0.0568, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.0788676738739014, |
|
"rewards/margins": 5.32081413269043, |
|
"rewards/real": 2.2419466972351074, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.2144549763033173e-07, |
|
"logits/generated": 6.559216499328613, |
|
"logits/real": 4.562496662139893, |
|
"logps/generated": -326.8006591796875, |
|
"logps/real": -229.6642608642578, |
|
"loss": 0.046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.402270793914795, |
|
"rewards/margins": 5.84303092956543, |
|
"rewards/real": 2.4407601356506348, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.20260663507109e-07, |
|
"logits/generated": 6.503948211669922, |
|
"logits/real": 4.351350784301758, |
|
"logps/generated": -308.74688720703125, |
|
"logps/real": -216.2238006591797, |
|
"loss": 0.0504, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.0520529747009277, |
|
"rewards/margins": 5.5729241371154785, |
|
"rewards/real": 2.5208706855773926, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1907582938388625e-07, |
|
"logits/generated": 6.486424922943115, |
|
"logits/real": 4.887758731842041, |
|
"logps/generated": -324.04327392578125, |
|
"logps/real": -245.8160400390625, |
|
"loss": 0.0484, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.8712515830993652, |
|
"rewards/margins": 5.168000221252441, |
|
"rewards/real": 2.296747922897339, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.178909952606635e-07, |
|
"logits/generated": 6.496403694152832, |
|
"logits/real": 4.948525428771973, |
|
"logps/generated": -313.8655090332031, |
|
"logps/real": -232.6029815673828, |
|
"loss": 0.0384, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.164641857147217, |
|
"rewards/margins": 5.175568103790283, |
|
"rewards/real": 2.0109262466430664, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1670616113744074e-07, |
|
"logits/generated": 6.357646465301514, |
|
"logits/real": 4.914302825927734, |
|
"logps/generated": -304.9514465332031, |
|
"logps/real": -231.0625, |
|
"loss": 0.0739, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.8249194622039795, |
|
"rewards/margins": 4.825296878814697, |
|
"rewards/real": 2.0003771781921387, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.15521327014218e-07, |
|
"logits/generated": 6.526192665100098, |
|
"logits/real": 4.569971084594727, |
|
"logps/generated": -310.04571533203125, |
|
"logps/real": -212.1841278076172, |
|
"loss": 0.0442, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.9459242820739746, |
|
"rewards/margins": 5.625715732574463, |
|
"rewards/real": 2.6797919273376465, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1433649289099526e-07, |
|
"logits/generated": 6.470966339111328, |
|
"logits/real": 4.952636241912842, |
|
"logps/generated": -298.78216552734375, |
|
"logps/real": -241.9241180419922, |
|
"loss": 0.0488, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -2.7446885108947754, |
|
"rewards/margins": 4.924825668334961, |
|
"rewards/real": 2.1801369190216064, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.131516587677725e-07, |
|
"logits/generated": 6.543872833251953, |
|
"logits/real": 5.083493232727051, |
|
"logps/generated": -328.01873779296875, |
|
"logps/real": -251.84909057617188, |
|
"loss": 0.0382, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.1929991245269775, |
|
"rewards/margins": 5.793171405792236, |
|
"rewards/real": 2.6001715660095215, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1196682464454975e-07, |
|
"logits/generated": 6.523638725280762, |
|
"logits/real": 4.766221046447754, |
|
"logps/generated": -294.3127136230469, |
|
"logps/real": -227.9248809814453, |
|
"loss": 0.0495, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.653705596923828, |
|
"rewards/margins": 4.580477714538574, |
|
"rewards/real": 1.9267723560333252, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.10781990521327e-07, |
|
"logits/generated": 6.478121757507324, |
|
"logits/real": 5.104989528656006, |
|
"logps/generated": -296.796875, |
|
"logps/real": -241.1748809814453, |
|
"loss": 0.0434, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.8308489322662354, |
|
"rewards/margins": 4.990128517150879, |
|
"rewards/real": 2.1592795848846436, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.0959715639810427e-07, |
|
"logits/generated": 6.648934841156006, |
|
"logits/real": 4.384207725524902, |
|
"logps/generated": -314.3799133300781, |
|
"logps/real": -215.88339233398438, |
|
"loss": 0.0426, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.3432087898254395, |
|
"rewards/margins": 5.64975118637085, |
|
"rewards/real": 2.306542158126831, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.0841232227488152e-07, |
|
"logits/generated": 6.548549652099609, |
|
"logits/real": 4.6239542961120605, |
|
"logps/generated": -317.40057373046875, |
|
"logps/real": -234.81787109375, |
|
"loss": 0.0453, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.6547443866729736, |
|
"rewards/margins": 5.791055202484131, |
|
"rewards/real": 2.1363110542297363, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0722748815165874e-07, |
|
"logits/generated": 6.573233604431152, |
|
"logits/real": 4.537630558013916, |
|
"logps/generated": -312.5951232910156, |
|
"logps/real": -227.45846557617188, |
|
"loss": 0.0471, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.44865083694458, |
|
"rewards/margins": 5.903794288635254, |
|
"rewards/real": 2.455143690109253, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.06042654028436e-07, |
|
"logits/generated": 6.5797014236450195, |
|
"logits/real": 4.640379428863525, |
|
"logps/generated": -309.2285461425781, |
|
"logps/real": -226.133056640625, |
|
"loss": 0.0369, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.0750410556793213, |
|
"rewards/margins": 5.1463847160339355, |
|
"rewards/real": 2.0713436603546143, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.0485781990521326e-07, |
|
"logits/generated": 6.535305023193359, |
|
"logits/real": 4.605700492858887, |
|
"logps/generated": -319.6959228515625, |
|
"logps/real": -225.58975219726562, |
|
"loss": 0.0426, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.13779878616333, |
|
"rewards/margins": 5.010854721069336, |
|
"rewards/real": 1.8730554580688477, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0367298578199053e-07, |
|
"logits/generated": 6.437843322753906, |
|
"logits/real": 4.84013032913208, |
|
"logps/generated": -301.1745300292969, |
|
"logps/real": -227.8412322998047, |
|
"loss": 0.0418, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.978807210922241, |
|
"rewards/margins": 4.996744632720947, |
|
"rewards/real": 2.017937421798706, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0248815165876775e-07, |
|
"logits/generated": 6.470362186431885, |
|
"logits/real": 5.108733654022217, |
|
"logps/generated": -319.79840087890625, |
|
"logps/real": -237.103515625, |
|
"loss": 0.0673, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.0991902351379395, |
|
"rewards/margins": 5.182190418243408, |
|
"rewards/real": 2.0830001831054688, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0130331753554502e-07, |
|
"logits/generated": 6.5366621017456055, |
|
"logits/real": 5.0998101234436035, |
|
"logps/generated": -300.20758056640625, |
|
"logps/real": -247.12393188476562, |
|
"loss": 0.0637, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.1575188636779785, |
|
"rewards/margins": 5.251214981079102, |
|
"rewards/real": 2.093696117401123, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.0011848341232227e-07, |
|
"logits/generated": 6.446131706237793, |
|
"logits/real": 4.840705871582031, |
|
"logps/generated": -315.06439208984375, |
|
"logps/real": -226.3990478515625, |
|
"loss": 0.0472, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.494022846221924, |
|
"rewards/margins": 5.752326965332031, |
|
"rewards/real": 2.2583041191101074, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9893364928909952e-07, |
|
"logits/generated": 6.619847297668457, |
|
"logits/real": 4.667858600616455, |
|
"logps/generated": -294.6004333496094, |
|
"logps/real": -197.28118896484375, |
|
"loss": 0.0512, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.1600637435913086, |
|
"rewards/margins": 5.338671684265137, |
|
"rewards/real": 2.1786084175109863, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9774881516587676e-07, |
|
"logits/generated": 6.53371524810791, |
|
"logits/real": 4.700278282165527, |
|
"logps/generated": -309.0678405761719, |
|
"logps/real": -220.1807861328125, |
|
"loss": 0.0639, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.0327439308166504, |
|
"rewards/margins": 5.228504180908203, |
|
"rewards/real": 2.1957602500915527, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.96563981042654e-07, |
|
"logits/generated": 6.723300933837891, |
|
"logits/real": 4.539021968841553, |
|
"logps/generated": -309.5898132324219, |
|
"logps/real": -205.1199493408203, |
|
"loss": 0.0483, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.1201491355895996, |
|
"rewards/margins": 5.280410289764404, |
|
"rewards/real": 2.1602609157562256, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9537914691943128e-07, |
|
"logits/generated": 6.615107536315918, |
|
"logits/real": 4.7945075035095215, |
|
"logps/generated": -340.0155944824219, |
|
"logps/real": -234.44259643554688, |
|
"loss": 0.0439, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.4857048988342285, |
|
"rewards/margins": 5.184080600738525, |
|
"rewards/real": 1.6983757019042969, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.9419431279620853e-07, |
|
"logits/generated": 6.457086086273193, |
|
"logits/real": 4.920231819152832, |
|
"logps/generated": -301.90081787109375, |
|
"logps/real": -209.1177978515625, |
|
"loss": 0.0413, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -2.7858481407165527, |
|
"rewards/margins": 4.725141525268555, |
|
"rewards/real": 1.9392932653427124, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.9300947867298577e-07, |
|
"logits/generated": 6.525505065917969, |
|
"logits/real": 4.8382768630981445, |
|
"logps/generated": -294.0171813964844, |
|
"logps/real": -215.0476531982422, |
|
"loss": 0.0385, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.3542685508728027, |
|
"rewards/margins": 5.338757514953613, |
|
"rewards/real": 1.9844884872436523, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.9182464454976302e-07, |
|
"logits/generated": 6.592054843902588, |
|
"logits/real": 4.595870494842529, |
|
"logps/generated": -303.69305419921875, |
|
"logps/real": -216.79074096679688, |
|
"loss": 0.044, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -3.3395583629608154, |
|
"rewards/margins": 5.180264949798584, |
|
"rewards/real": 1.8407065868377686, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.906398104265403e-07, |
|
"logits/generated": 6.421343803405762, |
|
"logits/real": 4.30601167678833, |
|
"logps/generated": -316.9730529785156, |
|
"logps/real": -221.79721069335938, |
|
"loss": 0.046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.5621120929718018, |
|
"rewards/margins": 5.808313846588135, |
|
"rewards/real": 2.246202230453491, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8945497630331754e-07, |
|
"logits/generated": 6.610709190368652, |
|
"logits/real": 4.592480182647705, |
|
"logps/generated": -305.1477355957031, |
|
"logps/real": -200.81964111328125, |
|
"loss": 0.0469, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.413442611694336, |
|
"rewards/margins": 5.351014614105225, |
|
"rewards/real": 1.9375722408294678, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8827014218009476e-07, |
|
"logits/generated": 6.592061519622803, |
|
"logits/real": 4.954311370849609, |
|
"logps/generated": -323.15411376953125, |
|
"logps/real": -261.54278564453125, |
|
"loss": 0.0489, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.3372626304626465, |
|
"rewards/margins": 5.153835773468018, |
|
"rewards/real": 1.816572904586792, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8708530805687203e-07, |
|
"logits/generated": 6.648950099945068, |
|
"logits/real": 4.868198871612549, |
|
"logps/generated": -311.92987060546875, |
|
"logps/real": -221.17819213867188, |
|
"loss": 0.05, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.4177818298339844, |
|
"rewards/margins": 5.747452259063721, |
|
"rewards/real": 2.3296706676483154, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8590047393364928e-07, |
|
"logits/generated": 6.51468563079834, |
|
"logits/real": 4.445683479309082, |
|
"logps/generated": -308.3443908691406, |
|
"logps/real": -217.1921844482422, |
|
"loss": 0.0456, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.044935941696167, |
|
"rewards/margins": 5.253106594085693, |
|
"rewards/real": 2.2081711292266846, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8471563981042655e-07, |
|
"logits/generated": 6.549835205078125, |
|
"logits/real": 4.801936149597168, |
|
"logps/generated": -295.6900939941406, |
|
"logps/real": -213.8479766845703, |
|
"loss": 0.0543, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.1135101318359375, |
|
"rewards/margins": 5.0333404541015625, |
|
"rewards/real": 1.9198299646377563, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.8353080568720377e-07, |
|
"logits/generated": 6.434506416320801, |
|
"logits/real": 4.751941204071045, |
|
"logps/generated": -318.86431884765625, |
|
"logps/real": -244.60107421875, |
|
"loss": 0.0339, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.4822006225585938, |
|
"rewards/margins": 5.774388313293457, |
|
"rewards/real": 2.2921876907348633, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8234597156398104e-07, |
|
"logits/generated": 6.266590118408203, |
|
"logits/real": 4.948928356170654, |
|
"logps/generated": -310.56341552734375, |
|
"logps/real": -226.72293090820312, |
|
"loss": 0.0339, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.347864866256714, |
|
"rewards/margins": 5.35115909576416, |
|
"rewards/real": 2.003293991088867, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.811611374407583e-07, |
|
"logits/generated": 6.356667518615723, |
|
"logits/real": 4.884829521179199, |
|
"logps/generated": -313.6650390625, |
|
"logps/real": -234.5040283203125, |
|
"loss": 0.0393, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.3364055156707764, |
|
"rewards/margins": 5.497099876403809, |
|
"rewards/real": 2.160693645477295, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7997630331753554e-07, |
|
"logits/generated": 6.548737525939941, |
|
"logits/real": 4.825071334838867, |
|
"logps/generated": -291.5633850097656, |
|
"logps/real": -233.65988159179688, |
|
"loss": 0.0505, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.0673575401306152, |
|
"rewards/margins": 5.524203300476074, |
|
"rewards/real": 2.456845760345459, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7879146919431278e-07, |
|
"logits/generated": 6.497158050537109, |
|
"logits/real": 4.860651969909668, |
|
"logps/generated": -316.8197937011719, |
|
"logps/real": -223.147705078125, |
|
"loss": 0.0325, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.715630292892456, |
|
"rewards/margins": 5.779938697814941, |
|
"rewards/real": 2.064307689666748, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.7760663507109003e-07, |
|
"logits/generated": 6.6343674659729, |
|
"logits/real": 4.742542266845703, |
|
"logps/generated": -314.57635498046875, |
|
"logps/real": -218.7213897705078, |
|
"loss": 0.0589, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.649468183517456, |
|
"rewards/margins": 5.897641181945801, |
|
"rewards/real": 2.248173713684082, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.764218009478673e-07, |
|
"logits/generated": 6.572301387786865, |
|
"logits/real": 4.605647087097168, |
|
"logps/generated": -312.57159423828125, |
|
"logps/real": -218.27255249023438, |
|
"loss": 0.0499, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.4011642932891846, |
|
"rewards/margins": 5.888424396514893, |
|
"rewards/real": 2.487260341644287, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.7523696682464452e-07, |
|
"logits/generated": 6.526484489440918, |
|
"logits/real": 4.609063625335693, |
|
"logps/generated": -319.1774597167969, |
|
"logps/real": -225.0438232421875, |
|
"loss": 0.0432, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.5665206909179688, |
|
"rewards/margins": 5.571340084075928, |
|
"rewards/real": 2.004819393157959, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.740521327014218e-07, |
|
"logits/generated": 6.531280517578125, |
|
"logits/real": 4.885863780975342, |
|
"logps/generated": -315.1875915527344, |
|
"logps/real": -237.132568359375, |
|
"loss": 0.0469, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.1818227767944336, |
|
"rewards/margins": 5.084862232208252, |
|
"rewards/real": 1.9030392169952393, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7286729857819904e-07, |
|
"logits/generated": 6.615921974182129, |
|
"logits/real": 4.542719841003418, |
|
"logps/generated": -314.04119873046875, |
|
"logps/real": -225.2353515625, |
|
"loss": 0.0352, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.191343069076538, |
|
"rewards/margins": 5.438819408416748, |
|
"rewards/real": 2.247476816177368, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7168246445497631e-07, |
|
"logits/generated": 6.290173530578613, |
|
"logits/real": 4.586104869842529, |
|
"logps/generated": -313.9992370605469, |
|
"logps/real": -213.3672332763672, |
|
"loss": 0.0379, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.7760837078094482, |
|
"rewards/margins": 6.122511863708496, |
|
"rewards/real": 2.3464274406433105, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7049763033175353e-07, |
|
"logits/generated": 6.58236837387085, |
|
"logits/real": 4.376986503601074, |
|
"logps/generated": -296.7496643066406, |
|
"logps/real": -215.1361083984375, |
|
"loss": 0.0364, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -3.280973434448242, |
|
"rewards/margins": 5.545414924621582, |
|
"rewards/real": 2.26444149017334, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.693127962085308e-07, |
|
"logits/generated": 6.613530158996582, |
|
"logits/real": 4.965182304382324, |
|
"logps/generated": -320.67425537109375, |
|
"logps/real": -241.35256958007812, |
|
"loss": 0.0375, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.3830809593200684, |
|
"rewards/margins": 5.788832664489746, |
|
"rewards/real": 2.4057505130767822, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.6812796208530805e-07, |
|
"logits/generated": 6.390702724456787, |
|
"logits/real": 4.728426933288574, |
|
"logps/generated": -312.08929443359375, |
|
"logps/real": -227.0480499267578, |
|
"loss": 0.038, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.2798690795898438, |
|
"rewards/margins": 5.261677265167236, |
|
"rewards/real": 1.9818084239959717, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.669431279620853e-07, |
|
"logits/generated": 6.487574100494385, |
|
"logits/real": 4.512204647064209, |
|
"logps/generated": -321.84368896484375, |
|
"logps/real": -240.1066131591797, |
|
"loss": 0.0496, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.5348918437957764, |
|
"rewards/margins": 5.897745132446289, |
|
"rewards/real": 2.3628532886505127, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6575829383886255e-07, |
|
"logits/generated": 6.475184440612793, |
|
"logits/real": 4.735175132751465, |
|
"logps/generated": -296.4978942871094, |
|
"logps/real": -225.4010467529297, |
|
"loss": 0.053, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.1431491374969482, |
|
"rewards/margins": 5.334324359893799, |
|
"rewards/real": 2.1911754608154297, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.645734597156398e-07, |
|
"logits/generated": 6.46063756942749, |
|
"logits/real": 4.446132183074951, |
|
"logps/generated": -306.07568359375, |
|
"logps/real": -224.0647735595703, |
|
"loss": 0.0525, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.518324613571167, |
|
"rewards/margins": 5.946985721588135, |
|
"rewards/real": 2.4286608695983887, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6338862559241706e-07, |
|
"logits/generated": 6.575872898101807, |
|
"logits/real": 4.777173042297363, |
|
"logps/generated": -312.64984130859375, |
|
"logps/real": -225.09823608398438, |
|
"loss": 0.0327, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.520355701446533, |
|
"rewards/margins": 5.975916862487793, |
|
"rewards/real": 2.4555611610412598, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.622037914691943e-07, |
|
"logits/generated": 6.580047607421875, |
|
"logits/real": 4.5617146492004395, |
|
"logps/generated": -303.8891296386719, |
|
"logps/real": -218.8231658935547, |
|
"loss": 0.0451, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.342977523803711, |
|
"rewards/margins": 5.351634502410889, |
|
"rewards/real": 2.0086567401885986, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6101895734597156e-07, |
|
"logits/generated": 6.562408447265625, |
|
"logits/real": 5.205977916717529, |
|
"logps/generated": -321.8945617675781, |
|
"logps/real": -269.1778564453125, |
|
"loss": 0.0441, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.6907927989959717, |
|
"rewards/margins": 5.766333103179932, |
|
"rewards/real": 2.07554030418396, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.598341232227488e-07, |
|
"logits/generated": 6.638333320617676, |
|
"logits/real": 4.657790184020996, |
|
"logps/generated": -319.16754150390625, |
|
"logps/real": -232.8119354248047, |
|
"loss": 0.0498, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.34710693359375, |
|
"rewards/margins": 5.686694145202637, |
|
"rewards/real": 2.3395867347717285, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5864928909952605e-07, |
|
"logits/generated": 6.637696743011475, |
|
"logits/real": 4.663809776306152, |
|
"logps/generated": -317.9059143066406, |
|
"logps/real": -223.9643096923828, |
|
"loss": 0.0544, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.5467820167541504, |
|
"rewards/margins": 5.613127708435059, |
|
"rewards/real": 2.066345691680908, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5746445497630332e-07, |
|
"logits/generated": 6.5797553062438965, |
|
"logits/real": 5.032973289489746, |
|
"logps/generated": -309.12310791015625, |
|
"logps/real": -231.84146118164062, |
|
"loss": 0.0434, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.374034881591797, |
|
"rewards/margins": 5.373286247253418, |
|
"rewards/real": 1.9992516040802002, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5627962085308054e-07, |
|
"logits/generated": 6.6527228355407715, |
|
"logits/real": 4.813700199127197, |
|
"logps/generated": -314.54132080078125, |
|
"logps/real": -244.5780487060547, |
|
"loss": 0.0367, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.4785728454589844, |
|
"rewards/margins": 5.757080078125, |
|
"rewards/real": 2.2785069942474365, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5509478672985782e-07, |
|
"logits/generated": 6.394598007202148, |
|
"logits/real": 4.875939846038818, |
|
"logps/generated": -316.898681640625, |
|
"logps/real": -229.66049194335938, |
|
"loss": 0.0356, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.3502418994903564, |
|
"rewards/margins": 5.546257972717285, |
|
"rewards/real": 2.196016311645508, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5390995260663506e-07, |
|
"logits/generated": 6.59166955947876, |
|
"logits/real": 4.965734004974365, |
|
"logps/generated": -299.8507385253906, |
|
"logps/real": -240.0013885498047, |
|
"loss": 0.0397, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.1730546951293945, |
|
"rewards/margins": 5.254295349121094, |
|
"rewards/real": 2.081240177154541, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.5272511848341233e-07, |
|
"logits/generated": 6.579958915710449, |
|
"logits/real": 4.6075544357299805, |
|
"logps/generated": -311.3309631347656, |
|
"logps/real": -231.1168975830078, |
|
"loss": 0.0377, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.3581135272979736, |
|
"rewards/margins": 5.885567665100098, |
|
"rewards/real": 2.5274531841278076, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.5154028436018955e-07, |
|
"logits/generated": 6.564838409423828, |
|
"logits/real": 4.323554515838623, |
|
"logps/generated": -314.01080322265625, |
|
"logps/real": -216.9060821533203, |
|
"loss": 0.054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.6513094902038574, |
|
"rewards/margins": 6.1155195236206055, |
|
"rewards/real": 2.464210033416748, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.5035545023696683e-07, |
|
"logits/generated": 6.470817565917969, |
|
"logits/real": 4.969902038574219, |
|
"logps/generated": -315.4042663574219, |
|
"logps/real": -210.75894165039062, |
|
"loss": 0.0383, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.6396255493164062, |
|
"rewards/margins": 5.780555248260498, |
|
"rewards/real": 2.140929698944092, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.4917061611374407e-07, |
|
"logits/generated": 6.28502082824707, |
|
"logits/real": 4.579121112823486, |
|
"logps/generated": -298.55078125, |
|
"logps/real": -227.2353057861328, |
|
"loss": 0.0396, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.269718647003174, |
|
"rewards/margins": 5.49901819229126, |
|
"rewards/real": 2.2292990684509277, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4798578199052132e-07, |
|
"logits/generated": 6.529310703277588, |
|
"logits/real": 5.1802496910095215, |
|
"logps/generated": -314.0540466308594, |
|
"logps/real": -246.96493530273438, |
|
"loss": 0.0459, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.632396697998047, |
|
"rewards/margins": 5.8060173988342285, |
|
"rewards/real": 2.173621892929077, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.4680094786729857e-07, |
|
"logits/generated": 6.60665225982666, |
|
"logits/real": 4.755316257476807, |
|
"logps/generated": -315.1619873046875, |
|
"logps/real": -212.05435180664062, |
|
"loss": 0.0569, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.5212502479553223, |
|
"rewards/margins": 5.648011207580566, |
|
"rewards/real": 2.126760959625244, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.456161137440758e-07, |
|
"logits/generated": 6.501960754394531, |
|
"logits/real": 4.503037929534912, |
|
"logps/generated": -330.319580078125, |
|
"logps/real": -211.60977172851562, |
|
"loss": 0.0477, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.714958906173706, |
|
"rewards/margins": 5.880065441131592, |
|
"rewards/real": 2.1651062965393066, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4443127962085309e-07, |
|
"logits/generated": 6.524314880371094, |
|
"logits/real": 4.682694911956787, |
|
"logps/generated": -318.777099609375, |
|
"logps/real": -249.3422088623047, |
|
"loss": 0.0234, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.846958875656128, |
|
"rewards/margins": 5.798233985900879, |
|
"rewards/real": 1.9512755870819092, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4324644549763033e-07, |
|
"logits/generated": 6.539910793304443, |
|
"logits/real": 4.809814929962158, |
|
"logps/generated": -323.9732360839844, |
|
"logps/real": -247.67227172851562, |
|
"loss": 0.0404, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.5829575061798096, |
|
"rewards/margins": 5.772634506225586, |
|
"rewards/real": 2.1896770000457764, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4206161137440758e-07, |
|
"logits/generated": 6.5540771484375, |
|
"logits/real": 4.512951850891113, |
|
"logps/generated": -315.17010498046875, |
|
"logps/real": -210.12588500976562, |
|
"loss": 0.0278, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.6037814617156982, |
|
"rewards/margins": 5.973625183105469, |
|
"rewards/real": 2.3698432445526123, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.4087677725118482e-07, |
|
"logits/generated": 6.513745307922363, |
|
"logits/real": 4.433924198150635, |
|
"logps/generated": -310.23089599609375, |
|
"logps/real": -203.8213348388672, |
|
"loss": 0.036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.4215285778045654, |
|
"rewards/margins": 5.807127952575684, |
|
"rewards/real": 2.385599374771118, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.396919431279621e-07, |
|
"logits/generated": 6.5156755447387695, |
|
"logits/real": 4.558250427246094, |
|
"logps/generated": -299.29339599609375, |
|
"logps/real": -218.6123046875, |
|
"loss": 0.035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.3458030223846436, |
|
"rewards/margins": 5.673041820526123, |
|
"rewards/real": 2.3272387981414795, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3850710900473934e-07, |
|
"logits/generated": 6.494305610656738, |
|
"logits/real": 5.065830707550049, |
|
"logps/generated": -316.78814697265625, |
|
"logps/real": -231.40750122070312, |
|
"loss": 0.0381, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.5985469818115234, |
|
"rewards/margins": 5.727441310882568, |
|
"rewards/real": 2.128894805908203, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3732227488151656e-07, |
|
"logits/generated": 6.41510009765625, |
|
"logits/real": 4.9217329025268555, |
|
"logps/generated": -318.82440185546875, |
|
"logps/real": -246.50369262695312, |
|
"loss": 0.0435, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.5909526348114014, |
|
"rewards/margins": 6.108970642089844, |
|
"rewards/real": 2.5180177688598633, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3613744075829384e-07, |
|
"logits/generated": 6.501974582672119, |
|
"logits/real": 4.702073097229004, |
|
"logps/generated": -306.13214111328125, |
|
"logps/real": -238.8610382080078, |
|
"loss": 0.0517, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.403776168823242, |
|
"rewards/margins": 5.4921417236328125, |
|
"rewards/real": 2.0883657932281494, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3495260663507108e-07, |
|
"logits/generated": 6.479678153991699, |
|
"logits/real": 5.00525426864624, |
|
"logps/generated": -333.7792663574219, |
|
"logps/real": -235.4060516357422, |
|
"loss": 0.0294, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.638486385345459, |
|
"rewards/margins": 5.711910247802734, |
|
"rewards/real": 2.0734241008758545, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3376777251184836e-07, |
|
"logits/generated": 6.646874904632568, |
|
"logits/real": 4.480135917663574, |
|
"logps/generated": -319.85870361328125, |
|
"logps/real": -205.182861328125, |
|
"loss": 0.0435, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.5380020141601562, |
|
"rewards/margins": 5.962095737457275, |
|
"rewards/real": 2.424093723297119, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3258293838862558e-07, |
|
"logits/generated": 6.473311424255371, |
|
"logits/real": 4.572601795196533, |
|
"logps/generated": -313.9909973144531, |
|
"logps/real": -238.57861328125, |
|
"loss": 0.0355, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.488398790359497, |
|
"rewards/margins": 5.876242637634277, |
|
"rewards/real": 2.3878438472747803, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3139810426540285e-07, |
|
"logits/generated": 6.481254577636719, |
|
"logits/real": 4.894389629364014, |
|
"logps/generated": -322.4656677246094, |
|
"logps/real": -231.29037475585938, |
|
"loss": 0.0429, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.569429874420166, |
|
"rewards/margins": 5.297967910766602, |
|
"rewards/real": 1.7285382747650146, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.302132701421801e-07, |
|
"logits/generated": 6.44034481048584, |
|
"logits/real": 4.870292663574219, |
|
"logps/generated": -309.8015441894531, |
|
"logps/real": -232.23141479492188, |
|
"loss": 0.0466, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.57190203666687, |
|
"rewards/margins": 5.714931488037109, |
|
"rewards/real": 2.1430296897888184, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.2902843601895734e-07, |
|
"logits/generated": 6.628100395202637, |
|
"logits/real": 4.56320333480835, |
|
"logps/generated": -314.26617431640625, |
|
"logps/real": -216.93435668945312, |
|
"loss": 0.0364, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.669429302215576, |
|
"rewards/margins": 5.688117980957031, |
|
"rewards/real": 2.018688201904297, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.278436018957346e-07, |
|
"logits/generated": 6.5281829833984375, |
|
"logits/real": 4.858192443847656, |
|
"logps/generated": -314.5399169921875, |
|
"logps/real": -231.82437133789062, |
|
"loss": 0.0529, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.659435987472534, |
|
"rewards/margins": 6.2276201248168945, |
|
"rewards/real": 2.5681843757629395, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.2665876777251183e-07, |
|
"logits/generated": 6.622697353363037, |
|
"logits/real": 4.539548397064209, |
|
"logps/generated": -316.33612060546875, |
|
"logps/real": -218.0032196044922, |
|
"loss": 0.0514, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.372077465057373, |
|
"rewards/margins": 5.659914493560791, |
|
"rewards/real": 2.287837505340576, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.254739336492891e-07, |
|
"logits/generated": 6.434654235839844, |
|
"logits/real": 5.143196105957031, |
|
"logps/generated": -318.66705322265625, |
|
"logps/real": -229.20541381835938, |
|
"loss": 0.0388, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.7129311561584473, |
|
"rewards/margins": 5.720685958862305, |
|
"rewards/real": 2.007754325866699, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2428909952606635e-07, |
|
"logits/generated": 6.507617950439453, |
|
"logits/real": 4.2293171882629395, |
|
"logps/generated": -331.07684326171875, |
|
"logps/real": -223.13516235351562, |
|
"loss": 0.0279, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.012661933898926, |
|
"rewards/margins": 6.401325225830078, |
|
"rewards/real": 2.3886632919311523, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.231042654028436e-07, |
|
"logits/generated": 6.525460243225098, |
|
"logits/real": 4.729413986206055, |
|
"logps/generated": -307.34771728515625, |
|
"logps/real": -223.11129760742188, |
|
"loss": 0.0319, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.219517230987549, |
|
"rewards/margins": 5.384081840515137, |
|
"rewards/real": 2.164564609527588, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.2191943127962085e-07, |
|
"logits/generated": 6.551595211029053, |
|
"logits/real": 4.6989874839782715, |
|
"logps/generated": -298.13812255859375, |
|
"logps/real": -219.3531036376953, |
|
"loss": 0.0519, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.5793490409851074, |
|
"rewards/margins": 5.872319221496582, |
|
"rewards/real": 2.292970657348633, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.207345971563981e-07, |
|
"logits/generated": 6.519845485687256, |
|
"logits/real": 4.6157450675964355, |
|
"logps/generated": -313.74615478515625, |
|
"logps/real": -225.33602905273438, |
|
"loss": 0.044, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.348008394241333, |
|
"rewards/margins": 5.656905651092529, |
|
"rewards/real": 2.3088979721069336, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1954976303317534e-07, |
|
"logits/generated": 6.551909446716309, |
|
"logits/real": 4.742045879364014, |
|
"logps/generated": -308.15435791015625, |
|
"logps/real": -239.033935546875, |
|
"loss": 0.0385, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.4503626823425293, |
|
"rewards/margins": 5.495227813720703, |
|
"rewards/real": 2.0448646545410156, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.183649289099526e-07, |
|
"logits/generated": 6.58068323135376, |
|
"logits/real": 4.629731178283691, |
|
"logps/generated": -316.3744812011719, |
|
"logps/real": -217.2517547607422, |
|
"loss": 0.0322, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.685664415359497, |
|
"rewards/margins": 6.379915714263916, |
|
"rewards/real": 2.6942508220672607, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1718009478672986e-07, |
|
"logits/generated": 6.6019697189331055, |
|
"logits/real": 4.920955657958984, |
|
"logps/generated": -315.7640380859375, |
|
"logps/real": -215.07107543945312, |
|
"loss": 0.0342, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.7241485118865967, |
|
"rewards/margins": 5.824596405029297, |
|
"rewards/real": 2.1004483699798584, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.159952606635071e-07, |
|
"logits/generated": 6.5531206130981445, |
|
"logits/real": 4.487125396728516, |
|
"logps/generated": -308.80859375, |
|
"logps/real": -211.96389770507812, |
|
"loss": 0.0252, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.3599178791046143, |
|
"rewards/margins": 6.051535606384277, |
|
"rewards/real": 2.6916182041168213, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1481042654028436e-07, |
|
"logits/generated": 6.397873878479004, |
|
"logits/real": 4.594054698944092, |
|
"logps/generated": -307.98046875, |
|
"logps/real": -209.40310668945312, |
|
"loss": 0.0471, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.6652374267578125, |
|
"rewards/margins": 6.013858795166016, |
|
"rewards/real": 2.348621129989624, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.136255924170616e-07, |
|
"logits/generated": 6.375195026397705, |
|
"logits/real": 4.987454414367676, |
|
"logps/generated": -317.755859375, |
|
"logps/real": -241.71255493164062, |
|
"loss": 0.0497, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.5185465812683105, |
|
"rewards/margins": 5.797640800476074, |
|
"rewards/real": 2.2790942192077637, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1244075829383886e-07, |
|
"logits/generated": 6.541880130767822, |
|
"logits/real": 4.569333553314209, |
|
"logps/generated": -318.06109619140625, |
|
"logps/real": -230.28421020507812, |
|
"loss": 0.0618, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.4923720359802246, |
|
"rewards/margins": 5.879878044128418, |
|
"rewards/real": 2.3875060081481934, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.112559241706161e-07, |
|
"logits/generated": 6.615804195404053, |
|
"logits/real": 4.598508358001709, |
|
"logps/generated": -315.1165771484375, |
|
"logps/real": -230.8600616455078, |
|
"loss": 0.0409, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.722174882888794, |
|
"rewards/margins": 6.234826564788818, |
|
"rewards/real": 2.512650966644287, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.1007109004739336e-07, |
|
"logits/generated": 6.621249198913574, |
|
"logits/real": 4.556908130645752, |
|
"logps/generated": -320.9785461425781, |
|
"logps/real": -225.3008575439453, |
|
"loss": 0.0337, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.7469630241394043, |
|
"rewards/margins": 6.285904884338379, |
|
"rewards/real": 2.5389418601989746, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0888625592417061e-07, |
|
"logits/generated": 6.390491962432861, |
|
"logits/real": 4.773979187011719, |
|
"logps/generated": -312.04071044921875, |
|
"logps/real": -249.9011688232422, |
|
"loss": 0.041, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.3887813091278076, |
|
"rewards/margins": 5.657594203948975, |
|
"rewards/real": 2.268812894821167, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0770142180094787e-07, |
|
"logits/generated": 6.476538181304932, |
|
"logits/real": 4.669577598571777, |
|
"logps/generated": -312.90032958984375, |
|
"logps/real": -213.92166137695312, |
|
"loss": 0.0449, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.6199536323547363, |
|
"rewards/margins": 5.9859161376953125, |
|
"rewards/real": 2.365962505340576, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0651658767772511e-07, |
|
"logits/generated": 6.476927757263184, |
|
"logits/real": 4.563714504241943, |
|
"logps/generated": -315.058837890625, |
|
"logps/real": -224.32577514648438, |
|
"loss": 0.049, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.7334506511688232, |
|
"rewards/margins": 6.3079400062561035, |
|
"rewards/real": 2.5744881629943848, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0533175355450237e-07, |
|
"logits/generated": 6.539498329162598, |
|
"logits/real": 4.8311848640441895, |
|
"logps/generated": -318.9219970703125, |
|
"logps/real": -240.50894165039062, |
|
"loss": 0.0457, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.6386666297912598, |
|
"rewards/margins": 6.052326679229736, |
|
"rewards/real": 2.4136602878570557, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0414691943127962e-07, |
|
"logits/generated": 6.435731410980225, |
|
"logits/real": 4.929614067077637, |
|
"logps/generated": -316.7015686035156, |
|
"logps/real": -229.21322631835938, |
|
"loss": 0.043, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.8294575214385986, |
|
"rewards/margins": 6.2790374755859375, |
|
"rewards/real": 2.4495797157287598, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0296208530805687e-07, |
|
"logits/generated": 6.627385139465332, |
|
"logits/real": 4.485804557800293, |
|
"logps/generated": -309.6326599121094, |
|
"logps/real": -211.5670166015625, |
|
"loss": 0.045, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -3.6681857109069824, |
|
"rewards/margins": 6.135626316070557, |
|
"rewards/real": 2.467440128326416, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0177725118483411e-07, |
|
"logits/generated": 6.6564741134643555, |
|
"logits/real": 4.68516206741333, |
|
"logps/generated": -318.8467712402344, |
|
"logps/real": -226.6106414794922, |
|
"loss": 0.051, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.5700950622558594, |
|
"rewards/margins": 5.723981857299805, |
|
"rewards/real": 2.153886556625366, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.0059241706161137e-07, |
|
"logits/generated": 6.6741204261779785, |
|
"logits/real": 4.536016941070557, |
|
"logps/generated": -324.20159912109375, |
|
"logps/real": -207.59896850585938, |
|
"loss": 0.0416, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.788970470428467, |
|
"rewards/margins": 6.435559272766113, |
|
"rewards/real": 2.6465885639190674, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.940758293838862e-08, |
|
"logits/generated": 6.565242767333984, |
|
"logits/real": 4.836869239807129, |
|
"logps/generated": -303.9171447753906, |
|
"logps/real": -200.09014892578125, |
|
"loss": 0.0259, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.631394147872925, |
|
"rewards/margins": 5.958956718444824, |
|
"rewards/real": 2.327561855316162, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.822274881516588e-08, |
|
"logits/generated": 6.608918190002441, |
|
"logits/real": 4.765702724456787, |
|
"logps/generated": -311.97247314453125, |
|
"logps/real": -218.27175903320312, |
|
"loss": 0.0397, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.8027045726776123, |
|
"rewards/margins": 6.115901470184326, |
|
"rewards/real": 2.3131978511810303, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.703791469194312e-08, |
|
"logits/generated": 6.5209455490112305, |
|
"logits/real": 4.784424781799316, |
|
"logps/generated": -316.18646240234375, |
|
"logps/real": -219.3183135986328, |
|
"loss": 0.0465, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.898909330368042, |
|
"rewards/margins": 5.930804252624512, |
|
"rewards/real": 2.0318946838378906, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.585308056872038e-08, |
|
"logits/generated": 6.519847869873047, |
|
"logits/real": 4.682705879211426, |
|
"logps/generated": -316.4295349121094, |
|
"logps/real": -221.3084259033203, |
|
"loss": 0.022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.6345527172088623, |
|
"rewards/margins": 5.830044746398926, |
|
"rewards/real": 2.1954920291900635, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.466824644549763e-08, |
|
"logits/generated": 6.437973976135254, |
|
"logits/real": 4.786694526672363, |
|
"logps/generated": -308.895263671875, |
|
"logps/real": -247.1173095703125, |
|
"loss": 0.0299, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.685072422027588, |
|
"rewards/margins": 5.676372051239014, |
|
"rewards/real": 1.9912999868392944, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.348341232227488e-08, |
|
"logits/generated": 6.578193664550781, |
|
"logits/real": 5.0969648361206055, |
|
"logps/generated": -321.40362548828125, |
|
"logps/real": -244.7572479248047, |
|
"loss": 0.0371, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.6856868267059326, |
|
"rewards/margins": 5.648140907287598, |
|
"rewards/real": 1.9624547958374023, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.229857819905212e-08, |
|
"logits/generated": 6.4852423667907715, |
|
"logits/real": 4.911639213562012, |
|
"logps/generated": -313.97930908203125, |
|
"logps/real": -257.989013671875, |
|
"loss": 0.0382, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.478628635406494, |
|
"rewards/margins": 5.914477348327637, |
|
"rewards/real": 2.435849666595459, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.111374407582938e-08, |
|
"logits/generated": 6.461939811706543, |
|
"logits/real": 4.53496789932251, |
|
"logps/generated": -324.73724365234375, |
|
"logps/real": -236.884521484375, |
|
"loss": 0.0395, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.916443347930908, |
|
"rewards/margins": 5.9058918952941895, |
|
"rewards/real": 1.9894483089447021, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.992890995260663e-08, |
|
"logits/generated": 6.4986162185668945, |
|
"logits/real": 4.736280918121338, |
|
"logps/generated": -315.95770263671875, |
|
"logps/real": -232.65249633789062, |
|
"loss": 0.0334, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.5177340507507324, |
|
"rewards/margins": 5.86563777923584, |
|
"rewards/real": 2.3479039669036865, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.874407582938389e-08, |
|
"logits/generated": 6.594623565673828, |
|
"logits/real": 4.596661567687988, |
|
"logps/generated": -294.2056579589844, |
|
"logps/real": -208.23135375976562, |
|
"loss": 0.0376, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.553370714187622, |
|
"rewards/margins": 5.874009609222412, |
|
"rewards/real": 2.3206381797790527, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.755924170616114e-08, |
|
"logits/generated": 6.560601234436035, |
|
"logits/real": 4.890211582183838, |
|
"logps/generated": -320.5434875488281, |
|
"logps/real": -234.8424835205078, |
|
"loss": 0.0322, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.717367649078369, |
|
"rewards/margins": 5.8168792724609375, |
|
"rewards/real": 2.0995113849639893, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.63744075829384e-08, |
|
"logits/generated": 6.643453121185303, |
|
"logits/real": 4.418960094451904, |
|
"logps/generated": -308.0656433105469, |
|
"logps/real": -188.0706787109375, |
|
"loss": 0.0344, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.6177737712860107, |
|
"rewards/margins": 6.433516502380371, |
|
"rewards/real": 2.8157434463500977, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.518957345971564e-08, |
|
"logits/generated": 6.609760284423828, |
|
"logits/real": 4.693791389465332, |
|
"logps/generated": -311.10589599609375, |
|
"logps/real": -222.8950653076172, |
|
"loss": 0.0359, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.7364165782928467, |
|
"rewards/margins": 6.025433540344238, |
|
"rewards/real": 2.2890164852142334, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.40047393364929e-08, |
|
"logits/generated": 6.644488334655762, |
|
"logits/real": 4.728980541229248, |
|
"logps/generated": -326.75518798828125, |
|
"logps/real": -216.3810577392578, |
|
"loss": 0.0326, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.6819024085998535, |
|
"rewards/margins": 6.162990570068359, |
|
"rewards/real": 2.4810874462127686, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.281990521327013e-08, |
|
"logits/generated": 6.576291561126709, |
|
"logits/real": 4.637971878051758, |
|
"logps/generated": -309.2288513183594, |
|
"logps/real": -236.91561889648438, |
|
"loss": 0.0378, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.5884525775909424, |
|
"rewards/margins": 5.920731544494629, |
|
"rewards/real": 2.3322794437408447, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.163507109004738e-08, |
|
"logits/generated": 6.310732841491699, |
|
"logits/real": 4.888187885284424, |
|
"logps/generated": -311.5484924316406, |
|
"logps/real": -224.40628051757812, |
|
"loss": 0.0343, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.938901901245117, |
|
"rewards/margins": 6.3101019859313965, |
|
"rewards/real": 2.3712000846862793, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 8.045023696682464e-08, |
|
"logits/generated": 6.325362205505371, |
|
"logits/real": 5.112117767333984, |
|
"logps/generated": -311.65325927734375, |
|
"logps/real": -254.33285522460938, |
|
"loss": 0.0511, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.4007866382598877, |
|
"rewards/margins": 5.50778865814209, |
|
"rewards/real": 2.1070024967193604, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.926540284360189e-08, |
|
"logits/generated": 6.5289106369018555, |
|
"logits/real": 4.755041599273682, |
|
"logps/generated": -333.82012939453125, |
|
"logps/real": -237.4073486328125, |
|
"loss": 0.0326, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.6312661170959473, |
|
"rewards/margins": 6.053757667541504, |
|
"rewards/real": 2.4224915504455566, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.808056872037915e-08, |
|
"logits/generated": 6.4816789627075195, |
|
"logits/real": 4.328751087188721, |
|
"logps/generated": -322.64190673828125, |
|
"logps/real": -216.17178344726562, |
|
"loss": 0.0482, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.9149162769317627, |
|
"rewards/margins": 6.450268745422363, |
|
"rewards/real": 2.5353522300720215, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.689573459715639e-08, |
|
"logits/generated": 6.6160569190979, |
|
"logits/real": 4.253937244415283, |
|
"logps/generated": -321.759765625, |
|
"logps/real": -202.4170379638672, |
|
"loss": 0.0239, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.693296432495117, |
|
"rewards/margins": 5.80830192565918, |
|
"rewards/real": 2.1150054931640625, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.571090047393365e-08, |
|
"logits/generated": 6.380744934082031, |
|
"logits/real": 4.77320671081543, |
|
"logps/generated": -306.7076110839844, |
|
"logps/real": -220.3052215576172, |
|
"loss": 0.0338, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.4694228172302246, |
|
"rewards/margins": 5.889524936676025, |
|
"rewards/real": 2.42010235786438, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.45260663507109e-08, |
|
"logits/generated": 6.456230163574219, |
|
"logits/real": 4.660184860229492, |
|
"logps/generated": -309.7335510253906, |
|
"logps/real": -223.43399047851562, |
|
"loss": 0.0355, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.4624855518341064, |
|
"rewards/margins": 5.547595500946045, |
|
"rewards/real": 2.0851101875305176, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.334123222748814e-08, |
|
"logits/generated": 6.5137529373168945, |
|
"logits/real": 4.8226494789123535, |
|
"logps/generated": -320.96771240234375, |
|
"logps/real": -244.80807495117188, |
|
"loss": 0.0315, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.9199230670928955, |
|
"rewards/margins": 6.18032693862915, |
|
"rewards/real": 2.2604031562805176, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.215639810426539e-08, |
|
"logits/generated": 6.472892761230469, |
|
"logits/real": 4.793478012084961, |
|
"logps/generated": -321.29473876953125, |
|
"logps/real": -232.20382690429688, |
|
"loss": 0.0445, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.531625270843506, |
|
"rewards/margins": 6.006019592285156, |
|
"rewards/real": 2.4743950366973877, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.097156398104265e-08, |
|
"logits/generated": 6.526330471038818, |
|
"logits/real": 4.876141548156738, |
|
"logps/generated": -308.0903625488281, |
|
"logps/real": -241.17123413085938, |
|
"loss": 0.0497, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.8017773628234863, |
|
"rewards/margins": 6.233469486236572, |
|
"rewards/real": 2.431692123413086, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.97867298578199e-08, |
|
"logits/generated": 6.507188320159912, |
|
"logits/real": 4.934351444244385, |
|
"logps/generated": -313.9234619140625, |
|
"logps/real": -229.16958618164062, |
|
"loss": 0.0412, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.8360989093780518, |
|
"rewards/margins": 5.914695739746094, |
|
"rewards/real": 2.0785973072052, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.860189573459716e-08, |
|
"logits/generated": 6.5582594871521, |
|
"logits/real": 4.455166339874268, |
|
"logps/generated": -316.6048583984375, |
|
"logps/real": -216.8268280029297, |
|
"loss": 0.0314, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.8321373462677, |
|
"rewards/margins": 6.344393730163574, |
|
"rewards/real": 2.512256383895874, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.74170616113744e-08, |
|
"logits/generated": 6.617920875549316, |
|
"logits/real": 4.433808326721191, |
|
"logps/generated": -310.179931640625, |
|
"logps/real": -225.44900512695312, |
|
"loss": 0.0409, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.6572768688201904, |
|
"rewards/margins": 6.027648448944092, |
|
"rewards/real": 2.3703715801239014, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.623222748815166e-08, |
|
"logits/generated": 6.532803535461426, |
|
"logits/real": 4.4606032371521, |
|
"logps/generated": -305.09393310546875, |
|
"logps/real": -212.5522918701172, |
|
"loss": 0.0441, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.38942289352417, |
|
"rewards/margins": 5.628909587860107, |
|
"rewards/real": 2.2394864559173584, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.504739336492891e-08, |
|
"logits/generated": 6.532896995544434, |
|
"logits/real": 4.811502456665039, |
|
"logps/generated": -321.75567626953125, |
|
"logps/real": -236.20590209960938, |
|
"loss": 0.0334, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.9196619987487793, |
|
"rewards/margins": 6.389462471008301, |
|
"rewards/real": 2.4698009490966797, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.386255924170615e-08, |
|
"logits/generated": 6.573578834533691, |
|
"logits/real": 4.610198497772217, |
|
"logps/generated": -330.74285888671875, |
|
"logps/real": -208.0010986328125, |
|
"loss": 0.0436, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.901912212371826, |
|
"rewards/margins": 6.33354377746582, |
|
"rewards/real": 2.431631326675415, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.26777251184834e-08, |
|
"logits/generated": 6.6127519607543945, |
|
"logits/real": 4.326380252838135, |
|
"logps/generated": -313.0176696777344, |
|
"logps/real": -204.0683135986328, |
|
"loss": 0.0283, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.7593460083007812, |
|
"rewards/margins": 6.059396266937256, |
|
"rewards/real": 2.3000504970550537, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.149289099526066e-08, |
|
"logits/generated": 6.535134792327881, |
|
"logits/real": 4.897824287414551, |
|
"logps/generated": -308.34429931640625, |
|
"logps/real": -220.6827392578125, |
|
"loss": 0.0532, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -3.668942928314209, |
|
"rewards/margins": 5.9941911697387695, |
|
"rewards/real": 2.325247287750244, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.030805687203791e-08, |
|
"logits/generated": 6.5091376304626465, |
|
"logits/real": 4.820844650268555, |
|
"logps/generated": -324.505859375, |
|
"logps/real": -224.4438018798828, |
|
"loss": 0.0174, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.988135814666748, |
|
"rewards/margins": 6.7851080894470215, |
|
"rewards/real": 2.7969725131988525, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.912322274881516e-08, |
|
"logits/generated": 6.506842136383057, |
|
"logits/real": 4.628279685974121, |
|
"logps/generated": -311.8347473144531, |
|
"logps/real": -239.9702606201172, |
|
"loss": 0.0313, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.3794751167297363, |
|
"rewards/margins": 5.759786605834961, |
|
"rewards/real": 2.380312204360962, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.793838862559241e-08, |
|
"logits/generated": 6.515402793884277, |
|
"logits/real": 4.95352840423584, |
|
"logps/generated": -318.76495361328125, |
|
"logps/real": -234.7864227294922, |
|
"loss": 0.0425, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.8171284198760986, |
|
"rewards/margins": 6.229681968688965, |
|
"rewards/real": 2.412553071975708, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.6753554502369666e-08, |
|
"logits/generated": 6.587221622467041, |
|
"logits/real": 4.416947364807129, |
|
"logps/generated": -324.5755310058594, |
|
"logps/real": -213.75076293945312, |
|
"loss": 0.0404, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.916365146636963, |
|
"rewards/margins": 6.1400957107543945, |
|
"rewards/real": 2.22373104095459, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.556872037914691e-08, |
|
"logits/generated": 6.564489841461182, |
|
"logits/real": 4.871767997741699, |
|
"logps/generated": -308.17706298828125, |
|
"logps/real": -221.3224639892578, |
|
"loss": 0.0365, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.5159783363342285, |
|
"rewards/margins": 5.63589334487915, |
|
"rewards/real": 2.11991548538208, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.4383886255924165e-08, |
|
"logits/generated": 6.402759552001953, |
|
"logits/real": 4.591031074523926, |
|
"logps/generated": -328.0219421386719, |
|
"logps/real": -236.8424072265625, |
|
"loss": 0.0356, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.7980494499206543, |
|
"rewards/margins": 6.346069812774658, |
|
"rewards/real": 2.5480198860168457, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.319905213270142e-08, |
|
"logits/generated": 6.591435432434082, |
|
"logits/real": 4.606555938720703, |
|
"logps/generated": -303.12579345703125, |
|
"logps/real": -228.23226928710938, |
|
"loss": 0.0447, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.5328078269958496, |
|
"rewards/margins": 6.194054126739502, |
|
"rewards/real": 2.6612462997436523, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.201421800947867e-08, |
|
"logits/generated": 6.483080863952637, |
|
"logits/real": 4.758042335510254, |
|
"logps/generated": -307.1785583496094, |
|
"logps/real": -221.1710662841797, |
|
"loss": 0.0277, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.7217071056365967, |
|
"rewards/margins": 6.27499532699585, |
|
"rewards/real": 2.553287982940674, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.082938388625592e-08, |
|
"logits/generated": 6.502976417541504, |
|
"logits/real": 4.846578121185303, |
|
"logps/generated": -317.02081298828125, |
|
"logps/real": -225.8799591064453, |
|
"loss": 0.037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.6737067699432373, |
|
"rewards/margins": 5.691414833068848, |
|
"rewards/real": 2.0177078247070312, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.964454976303317e-08, |
|
"logits/generated": 6.714502811431885, |
|
"logits/real": 4.4899725914001465, |
|
"logps/generated": -320.1687316894531, |
|
"logps/real": -220.10842895507812, |
|
"loss": 0.032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.954611301422119, |
|
"rewards/margins": 6.4107770919799805, |
|
"rewards/real": 2.4561660289764404, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.845971563981042e-08, |
|
"logits/generated": 6.47867488861084, |
|
"logits/real": 4.510740756988525, |
|
"logps/generated": -313.6661682128906, |
|
"logps/real": -206.0166015625, |
|
"loss": 0.0378, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.6164774894714355, |
|
"rewards/margins": 5.649336814880371, |
|
"rewards/real": 2.0328593254089355, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.7274881516587676e-08, |
|
"logits/generated": 6.634936332702637, |
|
"logits/real": 4.846175670623779, |
|
"logps/generated": -327.55364990234375, |
|
"logps/real": -215.91610717773438, |
|
"loss": 0.0275, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.14184045791626, |
|
"rewards/margins": 6.476358890533447, |
|
"rewards/real": 2.3345181941986084, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.609004739336492e-08, |
|
"logits/generated": 6.581854820251465, |
|
"logits/real": 4.857382774353027, |
|
"logps/generated": -305.4502258300781, |
|
"logps/real": -209.5597381591797, |
|
"loss": 0.0448, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.655374526977539, |
|
"rewards/margins": 6.246061325073242, |
|
"rewards/real": 2.5906870365142822, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.4905213270142176e-08, |
|
"logits/generated": 6.5722198486328125, |
|
"logits/real": 4.7393646240234375, |
|
"logps/generated": -326.43719482421875, |
|
"logps/real": -233.70809936523438, |
|
"loss": 0.0363, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.8187355995178223, |
|
"rewards/margins": 6.1232428550720215, |
|
"rewards/real": 2.3045077323913574, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.372037914691943e-08, |
|
"logits/generated": 6.555207252502441, |
|
"logits/real": 4.758434295654297, |
|
"logps/generated": -326.94232177734375, |
|
"logps/real": -235.101806640625, |
|
"loss": 0.025, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.6171951293945312, |
|
"rewards/margins": 6.253279685974121, |
|
"rewards/real": 2.636084794998169, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.253554502369668e-08, |
|
"logits/generated": 6.525106906890869, |
|
"logits/real": 5.279546737670898, |
|
"logps/generated": -331.38818359375, |
|
"logps/real": -274.1283264160156, |
|
"loss": 0.0272, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.6407923698425293, |
|
"rewards/margins": 6.059579372406006, |
|
"rewards/real": 2.4187867641448975, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.135071090047393e-08, |
|
"logits/generated": 6.604770660400391, |
|
"logits/real": 4.898898601531982, |
|
"logps/generated": -330.6890563964844, |
|
"logps/real": -254.55038452148438, |
|
"loss": 0.0362, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.8300297260284424, |
|
"rewards/margins": 6.490457057952881, |
|
"rewards/real": 2.6604273319244385, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.016587677725118e-08, |
|
"logits/generated": 6.4878034591674805, |
|
"logits/real": 4.653387069702148, |
|
"logps/generated": -306.11895751953125, |
|
"logps/real": -233.1724395751953, |
|
"loss": 0.0334, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.7691116333007812, |
|
"rewards/margins": 6.210760593414307, |
|
"rewards/real": 2.441648006439209, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.8981042654028434e-08, |
|
"logits/generated": 6.604249477386475, |
|
"logits/real": 4.4642558097839355, |
|
"logps/generated": -328.152099609375, |
|
"logps/real": -215.21151733398438, |
|
"loss": 0.0333, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.84818696975708, |
|
"rewards/margins": 6.3337178230285645, |
|
"rewards/real": 2.485531806945801, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.779620853080569e-08, |
|
"logits/generated": 6.567579746246338, |
|
"logits/real": 4.3342695236206055, |
|
"logps/generated": -319.4460144042969, |
|
"logps/real": -200.232177734375, |
|
"loss": 0.0331, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.974278211593628, |
|
"rewards/margins": 6.716505527496338, |
|
"rewards/real": 2.7422266006469727, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.661137440758294e-08, |
|
"logits/generated": 6.469827175140381, |
|
"logits/real": 4.755372047424316, |
|
"logps/generated": -321.1123046875, |
|
"logps/real": -220.9445343017578, |
|
"loss": 0.0342, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.819784641265869, |
|
"rewards/margins": 6.050178050994873, |
|
"rewards/real": 2.230393171310425, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.5426540284360186e-08, |
|
"logits/generated": 6.43978214263916, |
|
"logits/real": 5.01400899887085, |
|
"logps/generated": -317.6650390625, |
|
"logps/real": -221.50173950195312, |
|
"loss": 0.0359, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.7374751567840576, |
|
"rewards/margins": 6.025723934173584, |
|
"rewards/real": 2.2882485389709473, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.424170616113744e-08, |
|
"logits/generated": 6.483642578125, |
|
"logits/real": 4.6836934089660645, |
|
"logps/generated": -328.49017333984375, |
|
"logps/real": -233.32470703125, |
|
"loss": 0.0285, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.040391445159912, |
|
"rewards/margins": 6.6246747970581055, |
|
"rewards/real": 2.5842835903167725, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.305687203791469e-08, |
|
"logits/generated": 6.557009220123291, |
|
"logits/real": 4.679731369018555, |
|
"logps/generated": -314.7176513671875, |
|
"logps/real": -220.220458984375, |
|
"loss": 0.0218, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.6416420936584473, |
|
"rewards/margins": 6.076503753662109, |
|
"rewards/real": 2.4348621368408203, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.1872037914691945e-08, |
|
"logits/generated": 6.50725793838501, |
|
"logits/real": 4.735629558563232, |
|
"logps/generated": -319.73260498046875, |
|
"logps/real": -205.85287475585938, |
|
"loss": 0.0332, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.5968334674835205, |
|
"rewards/margins": 5.893436908721924, |
|
"rewards/real": 2.2966036796569824, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.068720379146919e-08, |
|
"logits/generated": 6.5144171714782715, |
|
"logits/real": 4.513826847076416, |
|
"logps/generated": -318.6091613769531, |
|
"logps/real": -218.3349151611328, |
|
"loss": 0.041, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.7594285011291504, |
|
"rewards/margins": 6.054383754730225, |
|
"rewards/real": 2.2949557304382324, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.9502369668246444e-08, |
|
"logits/generated": 6.47725772857666, |
|
"logits/real": 4.8379034996032715, |
|
"logps/generated": -318.28778076171875, |
|
"logps/real": -225.16415405273438, |
|
"loss": 0.0263, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.90997052192688, |
|
"rewards/margins": 6.520627021789551, |
|
"rewards/real": 2.6106560230255127, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.8317535545023697e-08, |
|
"logits/generated": 6.458238124847412, |
|
"logits/real": 4.9540228843688965, |
|
"logps/generated": -315.47100830078125, |
|
"logps/real": -216.9081268310547, |
|
"loss": 0.0361, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.684654712677002, |
|
"rewards/margins": 6.162962436676025, |
|
"rewards/real": 2.4783077239990234, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.7132701421800947e-08, |
|
"logits/generated": 6.424310207366943, |
|
"logits/real": 4.891389846801758, |
|
"logps/generated": -316.9339904785156, |
|
"logps/real": -235.1954345703125, |
|
"loss": 0.0367, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.7844338417053223, |
|
"rewards/margins": 6.09124755859375, |
|
"rewards/real": 2.306814193725586, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.59478672985782e-08, |
|
"logits/generated": 6.582629203796387, |
|
"logits/real": 4.89013671875, |
|
"logps/generated": -328.97601318359375, |
|
"logps/real": -229.2420654296875, |
|
"loss": 0.0387, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.919386386871338, |
|
"rewards/margins": 6.390198707580566, |
|
"rewards/real": 2.4708125591278076, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.476303317535545e-08, |
|
"logits/generated": 6.474552154541016, |
|
"logits/real": 4.8046979904174805, |
|
"logps/generated": -309.959716796875, |
|
"logps/real": -234.17251586914062, |
|
"loss": 0.0466, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -3.949699878692627, |
|
"rewards/margins": 6.123934745788574, |
|
"rewards/real": 2.1742348670959473, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.3578199052132702e-08, |
|
"logits/generated": 6.575322151184082, |
|
"logits/real": 4.4408369064331055, |
|
"logps/generated": -315.2879333496094, |
|
"logps/real": -226.00564575195312, |
|
"loss": 0.0441, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.301480770111084, |
|
"rewards/margins": 5.636345863342285, |
|
"rewards/real": 2.3348641395568848, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.239336492890995e-08, |
|
"logits/generated": 6.517224311828613, |
|
"logits/real": 4.958826065063477, |
|
"logps/generated": -314.48382568359375, |
|
"logps/real": -232.69461059570312, |
|
"loss": 0.0376, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.824894428253174, |
|
"rewards/margins": 6.043306827545166, |
|
"rewards/real": 2.2184131145477295, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.1208530805687202e-08, |
|
"logits/generated": 6.625432014465332, |
|
"logits/real": 4.315027713775635, |
|
"logps/generated": -319.8591003417969, |
|
"logps/real": -203.87966918945312, |
|
"loss": 0.0411, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.930284023284912, |
|
"rewards/margins": 6.055028915405273, |
|
"rewards/real": 2.1247451305389404, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.002369668246445e-08, |
|
"logits/generated": 6.50540828704834, |
|
"logits/real": 5.131866455078125, |
|
"logps/generated": -318.6473693847656, |
|
"logps/real": -250.11904907226562, |
|
"loss": 0.0387, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.9697766304016113, |
|
"rewards/margins": 5.962163925170898, |
|
"rewards/real": 1.9923874139785767, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.8838862559241704e-08, |
|
"logits/generated": 6.527606964111328, |
|
"logits/real": 4.929044723510742, |
|
"logps/generated": -307.16680908203125, |
|
"logps/real": -220.23678588867188, |
|
"loss": 0.0262, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.7975895404815674, |
|
"rewards/margins": 6.474527835845947, |
|
"rewards/real": 2.67693829536438, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7654028436018954e-08, |
|
"logits/generated": 6.473954677581787, |
|
"logits/real": 4.672419548034668, |
|
"logps/generated": -316.3739013671875, |
|
"logps/real": -218.72024536132812, |
|
"loss": 0.0253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.7095909118652344, |
|
"rewards/margins": 6.246824741363525, |
|
"rewards/real": 2.537233829498291, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.6469194312796207e-08, |
|
"logits/generated": 6.356590747833252, |
|
"logits/real": 5.3553056716918945, |
|
"logps/generated": -313.9323425292969, |
|
"logps/real": -238.56777954101562, |
|
"loss": 0.0336, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -4.027235984802246, |
|
"rewards/margins": 6.5537543296813965, |
|
"rewards/real": 2.5265183448791504, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.528436018957346e-08, |
|
"logits/generated": 6.637836456298828, |
|
"logits/real": 4.594275951385498, |
|
"logps/generated": -310.762939453125, |
|
"logps/real": -227.982177734375, |
|
"loss": 0.0364, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.674046754837036, |
|
"rewards/margins": 6.259788990020752, |
|
"rewards/real": 2.585742473602295, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.409952606635071e-08, |
|
"logits/generated": 6.587820529937744, |
|
"logits/real": 4.828711032867432, |
|
"logps/generated": -322.9399719238281, |
|
"logps/real": -231.203857421875, |
|
"loss": 0.0363, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.4187331199645996, |
|
"rewards/margins": 5.429445266723633, |
|
"rewards/real": 2.0107123851776123, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2914691943127961e-08, |
|
"logits/generated": 6.576046943664551, |
|
"logits/real": 4.6913862228393555, |
|
"logps/generated": -310.24871826171875, |
|
"logps/real": -215.4926300048828, |
|
"loss": 0.0441, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.60530424118042, |
|
"rewards/margins": 5.837168216705322, |
|
"rewards/real": 2.2318644523620605, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.1729857819905212e-08, |
|
"logits/generated": 6.390922546386719, |
|
"logits/real": 5.047102451324463, |
|
"logps/generated": -316.705322265625, |
|
"logps/real": -221.8660125732422, |
|
"loss": 0.0365, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.7050914764404297, |
|
"rewards/margins": 6.110095500946045, |
|
"rewards/real": 2.4050049781799316, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0545023696682464e-08, |
|
"logits/generated": 6.420653343200684, |
|
"logits/real": 4.74511194229126, |
|
"logps/generated": -317.0543212890625, |
|
"logps/real": -218.3509979248047, |
|
"loss": 0.0397, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.9743943214416504, |
|
"rewards/margins": 6.493072509765625, |
|
"rewards/real": 2.5186777114868164, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 9.360189573459715e-09, |
|
"logits/generated": 6.5571417808532715, |
|
"logits/real": 4.7767133712768555, |
|
"logps/generated": -309.4183349609375, |
|
"logps/real": -237.371826171875, |
|
"loss": 0.0329, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.3121707439422607, |
|
"rewards/margins": 5.517508506774902, |
|
"rewards/real": 2.2053380012512207, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 8.175355450236966e-09, |
|
"logits/generated": 6.605482578277588, |
|
"logits/real": 4.707463264465332, |
|
"logps/generated": -327.2000427246094, |
|
"logps/real": -206.5057373046875, |
|
"loss": 0.0279, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.9358227252960205, |
|
"rewards/margins": 6.49956750869751, |
|
"rewards/real": 2.56374454498291, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.990521327014218e-09, |
|
"logits/generated": 6.495814323425293, |
|
"logits/real": 4.9706573486328125, |
|
"logps/generated": -296.98992919921875, |
|
"logps/real": -226.7158966064453, |
|
"loss": 0.0287, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.4257378578186035, |
|
"rewards/margins": 5.643680572509766, |
|
"rewards/real": 2.217942714691162, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.805687203791469e-09, |
|
"logits/generated": 6.507977485656738, |
|
"logits/real": 4.830941200256348, |
|
"logps/generated": -309.84075927734375, |
|
"logps/real": -210.8922119140625, |
|
"loss": 0.0384, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.37202525138855, |
|
"rewards/margins": 5.505019187927246, |
|
"rewards/real": 2.1329941749572754, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.62085308056872e-09, |
|
"logits/generated": 6.521729469299316, |
|
"logits/real": 4.934253215789795, |
|
"logps/generated": -309.5820007324219, |
|
"logps/real": -222.89987182617188, |
|
"loss": 0.0527, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.6575589179992676, |
|
"rewards/margins": 5.68410062789917, |
|
"rewards/real": 2.0265424251556396, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.4360189573459714e-09, |
|
"logits/generated": 6.571385860443115, |
|
"logits/real": 4.746085166931152, |
|
"logps/generated": -322.11639404296875, |
|
"logps/real": -239.91476440429688, |
|
"loss": 0.0411, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.9839770793914795, |
|
"rewards/margins": 6.489884376525879, |
|
"rewards/real": 2.5059072971343994, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.2511848341232227e-09, |
|
"logits/generated": 6.565645694732666, |
|
"logits/real": 4.96927547454834, |
|
"logps/generated": -318.505859375, |
|
"logps/real": -256.3047180175781, |
|
"loss": 0.0367, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.7661774158477783, |
|
"rewards/margins": 5.747965335845947, |
|
"rewards/real": 1.981787919998169, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.0663507109004738e-09, |
|
"logits/generated": 6.504978179931641, |
|
"logits/real": 4.623128414154053, |
|
"logps/generated": -328.41619873046875, |
|
"logps/real": -214.21871948242188, |
|
"loss": 0.0385, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.7540199756622314, |
|
"rewards/margins": 5.994838237762451, |
|
"rewards/real": 2.2408182621002197, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 4689, |
|
"total_flos": 0.0, |
|
"train_loss": 0.16397903456657928, |
|
"train_runtime": 29965.1509, |
|
"train_samples_per_second": 5.006, |
|
"train_steps_per_second": 0.156 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4689, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|