|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 5000, |
|
"global_step": 4168, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0002399232245681382, |
|
"grad_norm": 3.9086405364003305, |
|
"learning_rate": 1.199040767386091e-09, |
|
"logits/chosen": -0.9392852187156677, |
|
"logits/rejected": -0.9925774335861206, |
|
"logps/chosen": -164.85171508789062, |
|
"logps/rejected": -169.34266662597656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0023992322456813818, |
|
"grad_norm": 4.318184225673836, |
|
"learning_rate": 1.199040767386091e-08, |
|
"logits/chosen": -0.8653285503387451, |
|
"logits/rejected": -1.0646977424621582, |
|
"logps/chosen": -367.5494384765625, |
|
"logps/rejected": -308.0057067871094, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.3611111044883728, |
|
"rewards/chosen": 0.00055171106941998, |
|
"rewards/margins": 0.00021127487707417458, |
|
"rewards/rejected": 0.0003404362651053816, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0047984644913627635, |
|
"grad_norm": 4.384399942785772, |
|
"learning_rate": 2.398081534772182e-08, |
|
"logits/chosen": -0.9145099520683289, |
|
"logits/rejected": -0.9615824818611145, |
|
"logps/chosen": -254.70645141601562, |
|
"logps/rejected": -225.65023803710938, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0004928931593894958, |
|
"rewards/margins": 0.0004294395330362022, |
|
"rewards/rejected": 6.345368456095457e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007197696737044146, |
|
"grad_norm": 4.1919489271249395, |
|
"learning_rate": 3.597122302158273e-08, |
|
"logits/chosen": -1.0393908023834229, |
|
"logits/rejected": -1.1211938858032227, |
|
"logps/chosen": -247.6179962158203, |
|
"logps/rejected": -250.74832153320312, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.0005728682735934854, |
|
"rewards/margins": -0.0005012283218093216, |
|
"rewards/rejected": -7.164000999182463e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009596928982725527, |
|
"grad_norm": 4.043349234918003, |
|
"learning_rate": 4.796163069544364e-08, |
|
"logits/chosen": -1.0382745265960693, |
|
"logits/rejected": -1.1404989957809448, |
|
"logps/chosen": -246.5960693359375, |
|
"logps/rejected": -238.99038696289062, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0003935880376957357, |
|
"rewards/margins": 0.0007454471779055893, |
|
"rewards/rejected": -0.001139035215601325, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01199616122840691, |
|
"grad_norm": 4.337621377747828, |
|
"learning_rate": 5.995203836930455e-08, |
|
"logits/chosen": -0.9566876292228699, |
|
"logits/rejected": -1.0265729427337646, |
|
"logps/chosen": -273.5587463378906, |
|
"logps/rejected": -238.2271728515625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.0002776079345494509, |
|
"rewards/margins": -0.0013704797020182014, |
|
"rewards/rejected": 0.0010928716510534286, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014395393474088292, |
|
"grad_norm": 4.332693802131573, |
|
"learning_rate": 7.194244604316546e-08, |
|
"logits/chosen": -1.14139723777771, |
|
"logits/rejected": -1.063253402709961, |
|
"logps/chosen": -291.4471130371094, |
|
"logps/rejected": -265.26800537109375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.0003930005768779665, |
|
"rewards/margins": 0.00029442697996273637, |
|
"rewards/rejected": 9.857374243438244e-05, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.016794625719769675, |
|
"grad_norm": 3.9392376744722797, |
|
"learning_rate": 8.393285371702638e-08, |
|
"logits/chosen": -0.7830671072006226, |
|
"logits/rejected": -0.8284071087837219, |
|
"logps/chosen": -280.4967346191406, |
|
"logps/rejected": -269.8634033203125, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00010640527762006968, |
|
"rewards/margins": -0.00039604370249435306, |
|
"rewards/rejected": 0.00028963852673768997, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.019193857965451054, |
|
"grad_norm": 4.275834970816185, |
|
"learning_rate": 9.592326139088728e-08, |
|
"logits/chosen": -1.1247626543045044, |
|
"logits/rejected": -0.8464676141738892, |
|
"logps/chosen": -203.01101684570312, |
|
"logps/rejected": -241.64547729492188, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.00033823197009041905, |
|
"rewards/margins": 0.0008404625696130097, |
|
"rewards/rejected": -0.0005022305413149297, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.021593090211132437, |
|
"grad_norm": 4.009980090025205, |
|
"learning_rate": 1.0791366906474819e-07, |
|
"logits/chosen": -1.128251552581787, |
|
"logits/rejected": -1.1966060400009155, |
|
"logps/chosen": -348.4684143066406, |
|
"logps/rejected": -300.92156982421875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.00025963489315472543, |
|
"rewards/margins": 0.0006502953474409878, |
|
"rewards/rejected": -0.0003906603087671101, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02399232245681382, |
|
"grad_norm": 4.278362574458803, |
|
"learning_rate": 1.199040767386091e-07, |
|
"logits/chosen": -0.8752719759941101, |
|
"logits/rejected": -0.7615184783935547, |
|
"logps/chosen": -262.26171875, |
|
"logps/rejected": -279.4682312011719, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0001237258838955313, |
|
"rewards/margins": -0.00013651838526129723, |
|
"rewards/rejected": 1.2792646884918213e-05, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.026391554702495202, |
|
"grad_norm": 3.7292949735641874, |
|
"learning_rate": 1.3189448441247004e-07, |
|
"logits/chosen": -1.054966688156128, |
|
"logits/rejected": -1.089815616607666, |
|
"logps/chosen": -232.7165069580078, |
|
"logps/rejected": -230.30648803710938, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0005149660282768309, |
|
"rewards/margins": 0.0010742491576820612, |
|
"rewards/rejected": -0.0015892151277512312, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.028790786948176585, |
|
"grad_norm": 4.185972544798998, |
|
"learning_rate": 1.4388489208633092e-07, |
|
"logits/chosen": -0.9251031875610352, |
|
"logits/rejected": -1.0560011863708496, |
|
"logps/chosen": -302.79620361328125, |
|
"logps/rejected": -279.6351013183594, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0033786073327064514, |
|
"rewards/margins": -0.0015979796880856156, |
|
"rewards/rejected": -0.0017806284595280886, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.031190019193857964, |
|
"grad_norm": 3.7577616139381282, |
|
"learning_rate": 1.5587529976019183e-07, |
|
"logits/chosen": -1.1069813966751099, |
|
"logits/rejected": -1.0163028240203857, |
|
"logps/chosen": -225.87887573242188, |
|
"logps/rejected": -308.16943359375, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0021240042988210917, |
|
"rewards/margins": 0.0019912621937692165, |
|
"rewards/rejected": -0.004115266725420952, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03358925143953935, |
|
"grad_norm": 3.9602176902490616, |
|
"learning_rate": 1.6786570743405277e-07, |
|
"logits/chosen": -0.8096126317977905, |
|
"logits/rejected": -0.844383716583252, |
|
"logps/chosen": -278.711181640625, |
|
"logps/rejected": -270.23455810546875, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.0016858462477102876, |
|
"rewards/margins": 0.004555505700409412, |
|
"rewards/rejected": -0.0062413522973656654, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03598848368522073, |
|
"grad_norm": 4.198772547754269, |
|
"learning_rate": 1.7985611510791365e-07, |
|
"logits/chosen": -1.0384037494659424, |
|
"logits/rejected": -1.0555726289749146, |
|
"logps/chosen": -231.3898468017578, |
|
"logps/rejected": -225.4952392578125, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0014209033688530326, |
|
"rewards/margins": 0.00509048905223608, |
|
"rewards/rejected": -0.006511392537504435, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03838771593090211, |
|
"grad_norm": 4.167240538791073, |
|
"learning_rate": 1.9184652278177456e-07, |
|
"logits/chosen": -0.8518667221069336, |
|
"logits/rejected": -0.9568248987197876, |
|
"logps/chosen": -296.21734619140625, |
|
"logps/rejected": -231.2320098876953, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.004124562256038189, |
|
"rewards/margins": 0.004930226132273674, |
|
"rewards/rejected": -0.009054789319634438, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.040786948176583494, |
|
"grad_norm": 3.692310549028015, |
|
"learning_rate": 2.038369304556355e-07, |
|
"logits/chosen": -0.8354592323303223, |
|
"logits/rejected": -0.8758047819137573, |
|
"logps/chosen": -342.7477111816406, |
|
"logps/rejected": -333.38189697265625, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.004680985119193792, |
|
"rewards/margins": 0.004498300142586231, |
|
"rewards/rejected": -0.009179284796118736, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04318618042226487, |
|
"grad_norm": 4.32405896978214, |
|
"learning_rate": 2.1582733812949638e-07, |
|
"logits/chosen": -1.1283237934112549, |
|
"logits/rejected": -1.1168252229690552, |
|
"logps/chosen": -238.8912353515625, |
|
"logps/rejected": -229.00265502929688, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.004386520944535732, |
|
"rewards/margins": 0.007528006099164486, |
|
"rewards/rejected": -0.011914527975022793, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04558541266794626, |
|
"grad_norm": 4.666604305995101, |
|
"learning_rate": 2.278177458033573e-07, |
|
"logits/chosen": -0.9106446504592896, |
|
"logits/rejected": -0.9879466891288757, |
|
"logps/chosen": -306.4612121582031, |
|
"logps/rejected": -249.0087890625, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.006806717719882727, |
|
"rewards/margins": 0.007770798169076443, |
|
"rewards/rejected": -0.014577515423297882, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04798464491362764, |
|
"grad_norm": 3.9918872126977574, |
|
"learning_rate": 2.398081534772182e-07, |
|
"logits/chosen": -0.9901522397994995, |
|
"logits/rejected": -0.928848385810852, |
|
"logps/chosen": -313.17681884765625, |
|
"logps/rejected": -297.6922302246094, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.010936335660517216, |
|
"rewards/margins": 0.006564898882061243, |
|
"rewards/rejected": -0.017501235008239746, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05038387715930902, |
|
"grad_norm": 4.040620494299144, |
|
"learning_rate": 2.517985611510791e-07, |
|
"logits/chosen": -0.9027220606803894, |
|
"logits/rejected": -0.922700047492981, |
|
"logps/chosen": -230.9945831298828, |
|
"logps/rejected": -255.6648712158203, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.010718774050474167, |
|
"rewards/margins": 0.009615534916520119, |
|
"rewards/rejected": -0.020334308966994286, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.052783109404990404, |
|
"grad_norm": 4.002893355744946, |
|
"learning_rate": 2.637889688249401e-07, |
|
"logits/chosen": -0.8777509927749634, |
|
"logits/rejected": -0.9541767239570618, |
|
"logps/chosen": -312.22064208984375, |
|
"logps/rejected": -314.44476318359375, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.018286144360899925, |
|
"rewards/margins": 0.010097065940499306, |
|
"rewards/rejected": -0.02838321030139923, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05518234165067178, |
|
"grad_norm": 4.397529514704007, |
|
"learning_rate": 2.7577937649880093e-07, |
|
"logits/chosen": -0.8843205571174622, |
|
"logits/rejected": -0.7930720448493958, |
|
"logps/chosen": -240.90969848632812, |
|
"logps/rejected": -279.2537841796875, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.013729272410273552, |
|
"rewards/margins": 0.02465725876390934, |
|
"rewards/rejected": -0.03838653117418289, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05758157389635317, |
|
"grad_norm": 4.676021615108152, |
|
"learning_rate": 2.8776978417266184e-07, |
|
"logits/chosen": -1.0245535373687744, |
|
"logits/rejected": -1.0780936479568481, |
|
"logps/chosen": -303.7603454589844, |
|
"logps/rejected": -259.3138732910156, |
|
"loss": 0.6808, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.010991424322128296, |
|
"rewards/margins": 0.02684735879302025, |
|
"rewards/rejected": -0.037838783115148544, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05998080614203455, |
|
"grad_norm": 4.463678598202064, |
|
"learning_rate": 2.997601918465228e-07, |
|
"logits/chosen": -0.9612535238265991, |
|
"logits/rejected": -1.0222301483154297, |
|
"logps/chosen": -241.61404418945312, |
|
"logps/rejected": -236.07644653320312, |
|
"loss": 0.68, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.021920276805758476, |
|
"rewards/margins": 0.018481746315956116, |
|
"rewards/rejected": -0.04040202870965004, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06238003838771593, |
|
"grad_norm": 4.115268408123004, |
|
"learning_rate": 3.1175059952038366e-07, |
|
"logits/chosen": -1.0142881870269775, |
|
"logits/rejected": -0.8710586428642273, |
|
"logps/chosen": -263.0195617675781, |
|
"logps/rejected": -259.115478515625, |
|
"loss": 0.6769, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.021990353241562843, |
|
"rewards/margins": 0.03786135092377663, |
|
"rewards/rejected": -0.05985169857740402, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0647792706333973, |
|
"grad_norm": 4.201440552672297, |
|
"learning_rate": 3.2374100719424457e-07, |
|
"logits/chosen": -0.9422982931137085, |
|
"logits/rejected": -1.1441442966461182, |
|
"logps/chosen": -290.52044677734375, |
|
"logps/rejected": -235.48049926757812, |
|
"loss": 0.6748, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.028027933090925217, |
|
"rewards/margins": 0.016811534762382507, |
|
"rewards/rejected": -0.044839464128017426, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0671785028790787, |
|
"grad_norm": 4.458570754919466, |
|
"learning_rate": 3.3573141486810554e-07, |
|
"logits/chosen": -1.0597388744354248, |
|
"logits/rejected": -1.0107687711715698, |
|
"logps/chosen": -299.5600891113281, |
|
"logps/rejected": -287.019287109375, |
|
"loss": 0.6676, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.03363611549139023, |
|
"rewards/margins": 0.046159304678440094, |
|
"rewards/rejected": -0.07979541271924973, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06957773512476008, |
|
"grad_norm": 3.937791865544782, |
|
"learning_rate": 3.477218225419664e-07, |
|
"logits/chosen": -0.9224345088005066, |
|
"logits/rejected": -0.8189966082572937, |
|
"logps/chosen": -291.11492919921875, |
|
"logps/rejected": -267.08172607421875, |
|
"loss": 0.6674, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.04258224740624428, |
|
"rewards/margins": 0.05275702476501465, |
|
"rewards/rejected": -0.09533928334712982, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07197696737044146, |
|
"grad_norm": 4.881987417549259, |
|
"learning_rate": 3.597122302158273e-07, |
|
"logits/chosen": -1.013051152229309, |
|
"logits/rejected": -1.0528075695037842, |
|
"logps/chosen": -260.43798828125, |
|
"logps/rejected": -280.9106750488281, |
|
"loss": 0.6638, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.061515528708696365, |
|
"rewards/margins": 0.059713393449783325, |
|
"rewards/rejected": -0.12122891843318939, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07437619961612284, |
|
"grad_norm": 3.887121301077397, |
|
"learning_rate": 3.7170263788968827e-07, |
|
"logits/chosen": -0.8997815847396851, |
|
"logits/rejected": -0.9898494482040405, |
|
"logps/chosen": -277.71112060546875, |
|
"logps/rejected": -239.8883514404297, |
|
"loss": 0.6659, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.058370210230350494, |
|
"rewards/margins": 0.08987968415021896, |
|
"rewards/rejected": -0.14824989438056946, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07677543186180422, |
|
"grad_norm": 3.767825032617774, |
|
"learning_rate": 3.836930455635491e-07, |
|
"logits/chosen": -1.0142638683319092, |
|
"logits/rejected": -1.0835198163986206, |
|
"logps/chosen": -283.21722412109375, |
|
"logps/rejected": -256.9454040527344, |
|
"loss": 0.6599, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0824267640709877, |
|
"rewards/margins": 0.06453671306371689, |
|
"rewards/rejected": -0.1469634771347046, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07917466410748561, |
|
"grad_norm": 4.101417109334993, |
|
"learning_rate": 3.9568345323741003e-07, |
|
"logits/chosen": -0.8915877342224121, |
|
"logits/rejected": -0.7875005006790161, |
|
"logps/chosen": -256.7735595703125, |
|
"logps/rejected": -309.68743896484375, |
|
"loss": 0.648, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.13278505206108093, |
|
"rewards/margins": 0.12722721695899963, |
|
"rewards/rejected": -0.26001226902008057, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08157389635316699, |
|
"grad_norm": 4.184212161663267, |
|
"learning_rate": 4.07673860911271e-07, |
|
"logits/chosen": -0.8056583404541016, |
|
"logits/rejected": -0.8734966516494751, |
|
"logps/chosen": -257.9537353515625, |
|
"logps/rejected": -290.716552734375, |
|
"loss": 0.6385, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.15207326412200928, |
|
"rewards/margins": 0.15626882016658783, |
|
"rewards/rejected": -0.3083421289920807, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08397312859884837, |
|
"grad_norm": 5.07860606766039, |
|
"learning_rate": 4.1966426858513185e-07, |
|
"logits/chosen": -1.1134240627288818, |
|
"logits/rejected": -1.1105023622512817, |
|
"logps/chosen": -308.1408386230469, |
|
"logps/rejected": -322.1429748535156, |
|
"loss": 0.6317, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.37560468912124634, |
|
"rewards/margins": 0.14869387447834015, |
|
"rewards/rejected": -0.5242985486984253, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08637236084452975, |
|
"grad_norm": 6.681443438336797, |
|
"learning_rate": 4.3165467625899276e-07, |
|
"logits/chosen": -0.9821847677230835, |
|
"logits/rejected": -1.1448824405670166, |
|
"logps/chosen": -324.4971618652344, |
|
"logps/rejected": -285.352294921875, |
|
"loss": 0.6325, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5944857001304626, |
|
"rewards/margins": 0.16079989075660706, |
|
"rewards/rejected": -0.7552856206893921, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08877159309021113, |
|
"grad_norm": 5.547129015189758, |
|
"learning_rate": 4.436450839328537e-07, |
|
"logits/chosen": -0.9680454134941101, |
|
"logits/rejected": -0.8957147598266602, |
|
"logps/chosen": -285.62225341796875, |
|
"logps/rejected": -329.6009826660156, |
|
"loss": 0.6155, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.507738471031189, |
|
"rewards/margins": 0.350941002368927, |
|
"rewards/rejected": -0.858679473400116, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09117082533589252, |
|
"grad_norm": 5.005904357466744, |
|
"learning_rate": 4.556354916067146e-07, |
|
"logits/chosen": -1.0903767347335815, |
|
"logits/rejected": -1.0195515155792236, |
|
"logps/chosen": -279.7075500488281, |
|
"logps/rejected": -327.0304870605469, |
|
"loss": 0.5837, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.42896413803100586, |
|
"rewards/margins": 0.4645144045352936, |
|
"rewards/rejected": -0.8934786915779114, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.0935700575815739, |
|
"grad_norm": 6.3368430821651085, |
|
"learning_rate": 4.676258992805755e-07, |
|
"logits/chosen": -0.9331681132316589, |
|
"logits/rejected": -0.974704384803772, |
|
"logps/chosen": -358.29052734375, |
|
"logps/rejected": -354.2314147949219, |
|
"loss": 0.6125, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9378200769424438, |
|
"rewards/margins": 0.23260822892189026, |
|
"rewards/rejected": -1.1704282760620117, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09596928982725528, |
|
"grad_norm": 4.993194981103021, |
|
"learning_rate": 4.796163069544364e-07, |
|
"logits/chosen": -0.9397958517074585, |
|
"logits/rejected": -1.0308669805526733, |
|
"logps/chosen": -327.28485107421875, |
|
"logps/rejected": -373.20538330078125, |
|
"loss": 0.6133, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6638423800468445, |
|
"rewards/margins": 0.6587511897087097, |
|
"rewards/rejected": -1.3225935697555542, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09836852207293666, |
|
"grad_norm": 5.627853897652041, |
|
"learning_rate": 4.916067146282974e-07, |
|
"logits/chosen": -1.0697743892669678, |
|
"logits/rejected": -1.02415931224823, |
|
"logps/chosen": -302.128173828125, |
|
"logps/rejected": -373.0263366699219, |
|
"loss": 0.5737, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.543385922908783, |
|
"rewards/margins": 0.5250757932662964, |
|
"rewards/rejected": -1.0684617757797241, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10076775431861804, |
|
"grad_norm": 7.180880498197233, |
|
"learning_rate": 4.999992108529978e-07, |
|
"logits/chosen": -0.9345219731330872, |
|
"logits/rejected": -0.9572717547416687, |
|
"logps/chosen": -444.15997314453125, |
|
"logps/rejected": -470.5298767089844, |
|
"loss": 0.6004, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.090321660041809, |
|
"rewards/margins": 0.609116792678833, |
|
"rewards/rejected": -1.6994386911392212, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.10316698656429943, |
|
"grad_norm": 11.482376711377093, |
|
"learning_rate": 4.999851817115532e-07, |
|
"logits/chosen": -1.1403158903121948, |
|
"logits/rejected": -1.0577712059020996, |
|
"logps/chosen": -349.0794982910156, |
|
"logps/rejected": -424.08642578125, |
|
"loss": 0.598, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9095686078071594, |
|
"rewards/margins": 0.863958477973938, |
|
"rewards/rejected": -1.773526906967163, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.10556621880998081, |
|
"grad_norm": 5.363358928784856, |
|
"learning_rate": 4.999536171027889e-07, |
|
"logits/chosen": -0.8907009363174438, |
|
"logits/rejected": -0.991034984588623, |
|
"logps/chosen": -319.98046875, |
|
"logps/rejected": -347.8736877441406, |
|
"loss": 0.5812, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6185662150382996, |
|
"rewards/margins": 0.3348569869995117, |
|
"rewards/rejected": -0.9534232020378113, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.10796545105566219, |
|
"grad_norm": 5.047279311794404, |
|
"learning_rate": 4.999045192408369e-07, |
|
"logits/chosen": -1.0067179203033447, |
|
"logits/rejected": -1.0287652015686035, |
|
"logps/chosen": -324.7417907714844, |
|
"logps/rejected": -358.2411804199219, |
|
"loss": 0.585, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.852948784828186, |
|
"rewards/margins": 0.47164326906204224, |
|
"rewards/rejected": -1.324592113494873, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11036468330134357, |
|
"grad_norm": 11.251777016471365, |
|
"learning_rate": 4.998378915697171e-07, |
|
"logits/chosen": -0.9187090992927551, |
|
"logits/rejected": -0.9824856519699097, |
|
"logps/chosen": -357.00994873046875, |
|
"logps/rejected": -421.2936096191406, |
|
"loss": 0.5563, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7373157739639282, |
|
"rewards/margins": 0.7148237824440002, |
|
"rewards/rejected": -1.4521396160125732, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11276391554702495, |
|
"grad_norm": 8.58525552938624, |
|
"learning_rate": 4.997537387630958e-07, |
|
"logits/chosen": -1.0368196964263916, |
|
"logits/rejected": -1.0864078998565674, |
|
"logps/chosen": -331.54705810546875, |
|
"logps/rejected": -414.0245666503906, |
|
"loss": 0.5324, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0620375871658325, |
|
"rewards/margins": 0.7675203084945679, |
|
"rewards/rejected": -1.82955801486969, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.11516314779270634, |
|
"grad_norm": 7.973906104493475, |
|
"learning_rate": 4.996520667239582e-07, |
|
"logits/chosen": -1.2711211442947388, |
|
"logits/rejected": -1.1430588960647583, |
|
"logps/chosen": -344.3868408203125, |
|
"logps/rejected": -475.3819885253906, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9513166546821594, |
|
"rewards/margins": 0.8125057220458984, |
|
"rewards/rejected": -1.7638225555419922, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.11756238003838772, |
|
"grad_norm": 7.29273708493132, |
|
"learning_rate": 4.995328825841939e-07, |
|
"logits/chosen": -0.945563793182373, |
|
"logits/rejected": -0.9637011289596558, |
|
"logps/chosen": -333.838134765625, |
|
"logps/rejected": -502.4449157714844, |
|
"loss": 0.5398, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9119874238967896, |
|
"rewards/margins": 1.7599906921386719, |
|
"rewards/rejected": -2.671978235244751, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1199616122840691, |
|
"grad_norm": 8.408190304146231, |
|
"learning_rate": 4.993961947040967e-07, |
|
"logits/chosen": -0.9354039430618286, |
|
"logits/rejected": -1.008681297302246, |
|
"logps/chosen": -389.75677490234375, |
|
"logps/rejected": -416.6995544433594, |
|
"loss": 0.5543, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0735552310943604, |
|
"rewards/margins": 0.6031023263931274, |
|
"rewards/rejected": -1.6766574382781982, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12236084452975048, |
|
"grad_norm": 7.547972255202871, |
|
"learning_rate": 4.992420126717784e-07, |
|
"logits/chosen": -1.0642093420028687, |
|
"logits/rejected": -1.0110970735549927, |
|
"logps/chosen": -349.2254333496094, |
|
"logps/rejected": -501.6767578125, |
|
"loss": 0.5416, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.9027541875839233, |
|
"rewards/margins": 1.5682001113891602, |
|
"rewards/rejected": -2.470954418182373, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.12476007677543186, |
|
"grad_norm": 6.11542459949028, |
|
"learning_rate": 4.990703473024958e-07, |
|
"logits/chosen": -0.8702675104141235, |
|
"logits/rejected": -1.03139066696167, |
|
"logps/chosen": -410.02264404296875, |
|
"logps/rejected": -504.4461364746094, |
|
"loss": 0.5536, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.188328742980957, |
|
"rewards/margins": 1.0218807458877563, |
|
"rewards/rejected": -2.210209608078003, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12715930902111325, |
|
"grad_norm": 9.452659503317662, |
|
"learning_rate": 4.98881210637893e-07, |
|
"logits/chosen": -1.1504271030426025, |
|
"logits/rejected": -1.0904886722564697, |
|
"logps/chosen": -299.77197265625, |
|
"logps/rejected": -411.39520263671875, |
|
"loss": 0.5477, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7923839092254639, |
|
"rewards/margins": 0.7800144553184509, |
|
"rewards/rejected": -1.5723984241485596, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1295585412667946, |
|
"grad_norm": 15.54174844903523, |
|
"learning_rate": 4.986746159451553e-07, |
|
"logits/chosen": -1.0087683200836182, |
|
"logits/rejected": -1.0048372745513916, |
|
"logps/chosen": -348.1635437011719, |
|
"logps/rejected": -465.615234375, |
|
"loss": 0.5681, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.949160099029541, |
|
"rewards/margins": 1.2487725019454956, |
|
"rewards/rejected": -2.197932720184326, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.131957773512476, |
|
"grad_norm": 6.811166436392921, |
|
"learning_rate": 4.984505777160795e-07, |
|
"logits/chosen": -0.8339638710021973, |
|
"logits/rejected": -0.8557635545730591, |
|
"logps/chosen": -368.79998779296875, |
|
"logps/rejected": -455.84423828125, |
|
"loss": 0.5638, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7133311629295349, |
|
"rewards/margins": 0.8222800493240356, |
|
"rewards/rejected": -1.5356113910675049, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1343570057581574, |
|
"grad_norm": 8.38746715084373, |
|
"learning_rate": 4.982091116660574e-07, |
|
"logits/chosen": -0.9729937314987183, |
|
"logits/rejected": -1.1065596342086792, |
|
"logps/chosen": -269.25994873046875, |
|
"logps/rejected": -294.3720703125, |
|
"loss": 0.5681, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6644600629806519, |
|
"rewards/margins": 0.42904800176620483, |
|
"rewards/rejected": -1.0935081243515015, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.13675623800383876, |
|
"grad_norm": 24.438576955287925, |
|
"learning_rate": 4.979502347329732e-07, |
|
"logits/chosen": -0.8234192132949829, |
|
"logits/rejected": -0.8349748849868774, |
|
"logps/chosen": -419.22637939453125, |
|
"logps/rejected": -574.6170654296875, |
|
"loss": 0.5309, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4598208665847778, |
|
"rewards/margins": 1.224372148513794, |
|
"rewards/rejected": -2.6841928958892822, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.13915547024952016, |
|
"grad_norm": 8.10566134085291, |
|
"learning_rate": 4.976739650760151e-07, |
|
"logits/chosen": -1.0753071308135986, |
|
"logits/rejected": -1.1201988458633423, |
|
"logps/chosen": -420.6094665527344, |
|
"logps/rejected": -514.7296142578125, |
|
"loss": 0.5764, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6122499704360962, |
|
"rewards/margins": 1.0379631519317627, |
|
"rewards/rejected": -2.6502132415771484, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14155470249520152, |
|
"grad_norm": 6.442606806291236, |
|
"learning_rate": 4.97380322074402e-07, |
|
"logits/chosen": -0.7010489702224731, |
|
"logits/rejected": -0.7692248225212097, |
|
"logps/chosen": -344.9544982910156, |
|
"logps/rejected": -453.3834533691406, |
|
"loss": 0.5796, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.21074640750885, |
|
"rewards/margins": 1.0821633338928223, |
|
"rewards/rejected": -2.292909860610962, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.14395393474088292, |
|
"grad_norm": 8.67810882207825, |
|
"learning_rate": 4.970693263260237e-07, |
|
"logits/chosen": -0.9984515905380249, |
|
"logits/rejected": -1.0792922973632812, |
|
"logps/chosen": -385.07855224609375, |
|
"logps/rejected": -431.91748046875, |
|
"loss": 0.5352, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8377297520637512, |
|
"rewards/margins": 0.9412263631820679, |
|
"rewards/rejected": -1.7789561748504639, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1463531669865643, |
|
"grad_norm": 19.243246370458866, |
|
"learning_rate": 4.967409996459966e-07, |
|
"logits/chosen": -0.9357202649116516, |
|
"logits/rejected": -0.966041088104248, |
|
"logps/chosen": -379.83709716796875, |
|
"logps/rejected": -436.16021728515625, |
|
"loss": 0.531, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2088489532470703, |
|
"rewards/margins": 0.8029053807258606, |
|
"rewards/rejected": -2.011754274368286, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.14875239923224567, |
|
"grad_norm": 15.946020238913784, |
|
"learning_rate": 4.963953650651326e-07, |
|
"logits/chosen": -0.836955726146698, |
|
"logits/rejected": -0.9188618659973145, |
|
"logps/chosen": -472.018798828125, |
|
"logps/rejected": -462.2901306152344, |
|
"loss": 0.5215, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2413372993469238, |
|
"rewards/margins": 0.8713501691818237, |
|
"rewards/rejected": -2.112687587738037, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.15115163147792707, |
|
"grad_norm": 7.979287445807055, |
|
"learning_rate": 4.960324468283248e-07, |
|
"logits/chosen": -1.00057053565979, |
|
"logits/rejected": -1.0442100763320923, |
|
"logps/chosen": -290.08868408203125, |
|
"logps/rejected": -377.97027587890625, |
|
"loss": 0.515, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.6758447885513306, |
|
"rewards/margins": 0.8780001401901245, |
|
"rewards/rejected": -1.5538448095321655, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.15355086372360843, |
|
"grad_norm": 10.945616218615863, |
|
"learning_rate": 4.956522703928451e-07, |
|
"logits/chosen": -0.978852391242981, |
|
"logits/rejected": -0.8796356916427612, |
|
"logps/chosen": -318.9521484375, |
|
"logps/rejected": -465.2935485839844, |
|
"loss": 0.5063, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8167131543159485, |
|
"rewards/margins": 1.364931344985962, |
|
"rewards/rejected": -2.1816444396972656, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.15595009596928983, |
|
"grad_norm": 12.631776767430628, |
|
"learning_rate": 4.952548624265606e-07, |
|
"logits/chosen": -0.9039742350578308, |
|
"logits/rejected": -0.9406811594963074, |
|
"logps/chosen": -403.57562255859375, |
|
"logps/rejected": -478.9420471191406, |
|
"loss": 0.5638, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3750524520874023, |
|
"rewards/margins": 0.8406556248664856, |
|
"rewards/rejected": -2.215708017349243, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.15834932821497122, |
|
"grad_norm": 6.1260020762155145, |
|
"learning_rate": 4.948402508060607e-07, |
|
"logits/chosen": -1.0212910175323486, |
|
"logits/rejected": -1.0476784706115723, |
|
"logps/chosen": -298.73028564453125, |
|
"logps/rejected": -382.78021240234375, |
|
"loss": 0.542, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6404946446418762, |
|
"rewards/margins": 0.9978824853897095, |
|
"rewards/rejected": -1.6383771896362305, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16074856046065258, |
|
"grad_norm": 9.288255831934693, |
|
"learning_rate": 4.944084646147038e-07, |
|
"logits/chosen": -0.869672417640686, |
|
"logits/rejected": -0.9254360198974609, |
|
"logps/chosen": -365.64019775390625, |
|
"logps/rejected": -378.85113525390625, |
|
"loss": 0.5793, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5646086931228638, |
|
"rewards/margins": 0.3554513156414032, |
|
"rewards/rejected": -0.9200600385665894, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.16314779270633398, |
|
"grad_norm": 12.016810333669639, |
|
"learning_rate": 4.939595341405754e-07, |
|
"logits/chosen": -0.8810294270515442, |
|
"logits/rejected": -0.9110749363899231, |
|
"logps/chosen": -320.2789001464844, |
|
"logps/rejected": -362.8446960449219, |
|
"loss": 0.5238, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.6605560183525085, |
|
"rewards/margins": 0.6321157217025757, |
|
"rewards/rejected": -1.2926716804504395, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.16554702495201534, |
|
"grad_norm": 9.358664871218739, |
|
"learning_rate": 4.93493490874365e-07, |
|
"logits/chosen": -0.9154292941093445, |
|
"logits/rejected": -0.9222054481506348, |
|
"logps/chosen": -362.84686279296875, |
|
"logps/rejected": -441.8236389160156, |
|
"loss": 0.5367, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.235999345779419, |
|
"rewards/margins": 0.7342005968093872, |
|
"rewards/rejected": -1.9701995849609375, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.16794625719769674, |
|
"grad_norm": 9.152240736767983, |
|
"learning_rate": 4.93010367507156e-07, |
|
"logits/chosen": -1.0514498949050903, |
|
"logits/rejected": -1.0271055698394775, |
|
"logps/chosen": -301.6610107421875, |
|
"logps/rejected": -385.259521484375, |
|
"loss": 0.5134, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8431800603866577, |
|
"rewards/margins": 1.0639536380767822, |
|
"rewards/rejected": -1.90713369846344, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.17034548944337813, |
|
"grad_norm": 12.567284708252304, |
|
"learning_rate": 4.925101979281332e-07, |
|
"logits/chosen": -0.9216286540031433, |
|
"logits/rejected": -1.0931814908981323, |
|
"logps/chosen": -433.27130126953125, |
|
"logps/rejected": -560.4276123046875, |
|
"loss": 0.5031, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2827928066253662, |
|
"rewards/margins": 1.7925609350204468, |
|
"rewards/rejected": -3.0753538608551025, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1727447216890595, |
|
"grad_norm": 11.741747616486963, |
|
"learning_rate": 4.919930172222054e-07, |
|
"logits/chosen": -0.9665408134460449, |
|
"logits/rejected": -1.0536162853240967, |
|
"logps/chosen": -417.403076171875, |
|
"logps/rejected": -570.5277099609375, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7012180089950562, |
|
"rewards/margins": 1.562795877456665, |
|
"rewards/rejected": -3.2640137672424316, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.1751439539347409, |
|
"grad_norm": 11.733941979044587, |
|
"learning_rate": 4.914588616675445e-07, |
|
"logits/chosen": -1.0246481895446777, |
|
"logits/rejected": -1.0158023834228516, |
|
"logps/chosen": -348.80133056640625, |
|
"logps/rejected": -439.29608154296875, |
|
"loss": 0.5572, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0424585342407227, |
|
"rewards/margins": 1.09738290309906, |
|
"rewards/rejected": -2.1398415565490723, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.17754318618042225, |
|
"grad_norm": 8.84337454037855, |
|
"learning_rate": 4.909077687330404e-07, |
|
"logits/chosen": -0.8301714658737183, |
|
"logits/rejected": -0.9135535359382629, |
|
"logps/chosen": -330.27264404296875, |
|
"logps/rejected": -350.2643127441406, |
|
"loss": 0.5051, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6734127402305603, |
|
"rewards/margins": 0.41529732942581177, |
|
"rewards/rejected": -1.088710069656372, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.17994241842610365, |
|
"grad_norm": 8.880233466088285, |
|
"learning_rate": 4.903397770756729e-07, |
|
"logits/chosen": -1.0296955108642578, |
|
"logits/rejected": -1.0950233936309814, |
|
"logps/chosen": -355.98968505859375, |
|
"logps/rejected": -452.5968322753906, |
|
"loss": 0.5248, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9854777455329895, |
|
"rewards/margins": 0.9587169885635376, |
|
"rewards/rejected": -1.9441944360733032, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.18234165067178504, |
|
"grad_norm": 10.737134261298277, |
|
"learning_rate": 4.897549265378004e-07, |
|
"logits/chosen": -0.9651594161987305, |
|
"logits/rejected": -1.0333675146102905, |
|
"logps/chosen": -473.6534729003906, |
|
"logps/rejected": -615.0003662109375, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.580303430557251, |
|
"rewards/margins": 1.4406054019927979, |
|
"rewards/rejected": -3.020908832550049, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.1847408829174664, |
|
"grad_norm": 10.188381001048064, |
|
"learning_rate": 4.891532581443643e-07, |
|
"logits/chosen": -1.1801836490631104, |
|
"logits/rejected": -1.2248878479003906, |
|
"logps/chosen": -438.201171875, |
|
"logps/rejected": -577.3571166992188, |
|
"loss": 0.4856, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3580670356750488, |
|
"rewards/margins": 1.4186818599700928, |
|
"rewards/rejected": -2.7767486572265625, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.1871401151631478, |
|
"grad_norm": 13.185729895925714, |
|
"learning_rate": 4.885348141000122e-07, |
|
"logits/chosen": -1.06368887424469, |
|
"logits/rejected": -1.0469920635223389, |
|
"logps/chosen": -363.1687316894531, |
|
"logps/rejected": -502.82684326171875, |
|
"loss": 0.4839, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2150341272354126, |
|
"rewards/margins": 1.3010752201080322, |
|
"rewards/rejected": -2.5161094665527344, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.18953934740882916, |
|
"grad_norm": 13.52232678239933, |
|
"learning_rate": 4.878996377861367e-07, |
|
"logits/chosen": -1.0281708240509033, |
|
"logits/rejected": -1.094001054763794, |
|
"logps/chosen": -321.34814453125, |
|
"logps/rejected": -453.59417724609375, |
|
"loss": 0.5211, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.0169535875320435, |
|
"rewards/margins": 1.2799100875854492, |
|
"rewards/rejected": -2.2968640327453613, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.19193857965451055, |
|
"grad_norm": 8.497366103850682, |
|
"learning_rate": 4.872477737578327e-07, |
|
"logits/chosen": -1.005568504333496, |
|
"logits/rejected": -0.9401241540908813, |
|
"logps/chosen": -398.41546630859375, |
|
"logps/rejected": -612.4130249023438, |
|
"loss": 0.4468, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.1582279205322266, |
|
"rewards/margins": 2.2199604511260986, |
|
"rewards/rejected": -3.378188371658325, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.19433781190019195, |
|
"grad_norm": 17.573875043998388, |
|
"learning_rate": 4.865792677407718e-07, |
|
"logits/chosen": -1.0809205770492554, |
|
"logits/rejected": -1.1499931812286377, |
|
"logps/chosen": -387.76202392578125, |
|
"logps/rejected": -490.18243408203125, |
|
"loss": 0.5666, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.46564781665802, |
|
"rewards/margins": 1.2134864330291748, |
|
"rewards/rejected": -2.6791341304779053, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.1967370441458733, |
|
"grad_norm": 8.204522849107846, |
|
"learning_rate": 4.858941666279955e-07, |
|
"logits/chosen": -0.8947283029556274, |
|
"logits/rejected": -0.9740638732910156, |
|
"logps/chosen": -393.2839660644531, |
|
"logps/rejected": -441.0948791503906, |
|
"loss": 0.5435, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2646175622940063, |
|
"rewards/margins": 0.5960845947265625, |
|
"rewards/rejected": -1.8607019186019897, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.1991362763915547, |
|
"grad_norm": 10.142292221516385, |
|
"learning_rate": 4.851925184766247e-07, |
|
"logits/chosen": -1.053379774093628, |
|
"logits/rejected": -1.1101844310760498, |
|
"logps/chosen": -351.99090576171875, |
|
"logps/rejected": -449.01751708984375, |
|
"loss": 0.4937, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.9777849316596985, |
|
"rewards/margins": 1.14426851272583, |
|
"rewards/rejected": -2.122053623199463, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.20153550863723607, |
|
"grad_norm": 12.034416836065418, |
|
"learning_rate": 4.844743725044897e-07, |
|
"logits/chosen": -1.0946062803268433, |
|
"logits/rejected": -1.2976312637329102, |
|
"logps/chosen": -378.5492248535156, |
|
"logps/rejected": -474.7021484375, |
|
"loss": 0.5084, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2278741598129272, |
|
"rewards/margins": 1.197396159172058, |
|
"rewards/rejected": -2.4252700805664062, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.20393474088291746, |
|
"grad_norm": 8.290745524509466, |
|
"learning_rate": 4.837397790866774e-07, |
|
"logits/chosen": -1.2077043056488037, |
|
"logits/rejected": -1.2064851522445679, |
|
"logps/chosen": -398.6294250488281, |
|
"logps/rejected": -532.10009765625, |
|
"loss": 0.5617, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.0644519329071045, |
|
"rewards/margins": 1.5777161121368408, |
|
"rewards/rejected": -2.642167806625366, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.20633397312859886, |
|
"grad_norm": 9.121636380617016, |
|
"learning_rate": 4.829887897519974e-07, |
|
"logits/chosen": -1.2348374128341675, |
|
"logits/rejected": -1.2065662145614624, |
|
"logps/chosen": -323.52520751953125, |
|
"logps/rejected": -456.983642578125, |
|
"loss": 0.5042, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9337062835693359, |
|
"rewards/margins": 1.160902500152588, |
|
"rewards/rejected": -2.094609022140503, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.20873320537428022, |
|
"grad_norm": 10.124524555819926, |
|
"learning_rate": 4.82221457179368e-07, |
|
"logits/chosen": -1.2232173681259155, |
|
"logits/rejected": -1.2059452533721924, |
|
"logps/chosen": -376.4814147949219, |
|
"logps/rejected": -523.2281494140625, |
|
"loss": 0.4462, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.0080255270004272, |
|
"rewards/margins": 1.6320230960845947, |
|
"rewards/rejected": -2.6400485038757324, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.21113243761996162, |
|
"grad_norm": 15.383829882801075, |
|
"learning_rate": 4.814378351941206e-07, |
|
"logits/chosen": -1.1030246019363403, |
|
"logits/rejected": -1.165531873703003, |
|
"logps/chosen": -376.2377014160156, |
|
"logps/rejected": -447.36871337890625, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1712180376052856, |
|
"rewards/margins": 0.8365498781204224, |
|
"rewards/rejected": -2.007767915725708, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.21353166986564298, |
|
"grad_norm": 8.924221297687437, |
|
"learning_rate": 4.806379787642241e-07, |
|
"logits/chosen": -1.13001549243927, |
|
"logits/rejected": -1.117497205734253, |
|
"logps/chosen": -358.25286865234375, |
|
"logps/rejected": -490.5638732910156, |
|
"loss": 0.5141, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0755687952041626, |
|
"rewards/margins": 1.3798635005950928, |
|
"rewards/rejected": -2.4554319381713867, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.21593090211132437, |
|
"grad_norm": 9.091481860281236, |
|
"learning_rate": 4.798219439964293e-07, |
|
"logits/chosen": -1.1416652202606201, |
|
"logits/rejected": -1.2265089750289917, |
|
"logps/chosen": -366.56915283203125, |
|
"logps/rejected": -414.20721435546875, |
|
"loss": 0.476, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.119057059288025, |
|
"rewards/margins": 0.3352181911468506, |
|
"rewards/rejected": -1.454275131225586, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.21833013435700577, |
|
"grad_norm": 17.10074312802646, |
|
"learning_rate": 4.78989788132333e-07, |
|
"logits/chosen": -1.1152770519256592, |
|
"logits/rejected": -1.1071698665618896, |
|
"logps/chosen": -342.0806884765625, |
|
"logps/rejected": -534.8381958007812, |
|
"loss": 0.4536, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.19199538230896, |
|
"rewards/margins": 1.8661339282989502, |
|
"rewards/rejected": -3.0581297874450684, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.22072936660268713, |
|
"grad_norm": 9.85354436821844, |
|
"learning_rate": 4.781415695443631e-07, |
|
"logits/chosen": -1.1570137739181519, |
|
"logits/rejected": -1.1956650018692017, |
|
"logps/chosen": -509.591552734375, |
|
"logps/rejected": -693.6530151367188, |
|
"loss": 0.5105, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.156250476837158, |
|
"rewards/margins": 1.8081929683685303, |
|
"rewards/rejected": -3.9644439220428467, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.22312859884836853, |
|
"grad_norm": 10.93257575281564, |
|
"learning_rate": 4.772773477316836e-07, |
|
"logits/chosen": -1.0778967142105103, |
|
"logits/rejected": -1.133569598197937, |
|
"logps/chosen": -377.20037841796875, |
|
"logps/rejected": -475.40997314453125, |
|
"loss": 0.4849, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1448915004730225, |
|
"rewards/margins": 0.9261860847473145, |
|
"rewards/rejected": -2.071077823638916, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2255278310940499, |
|
"grad_norm": 13.493444996052222, |
|
"learning_rate": 4.7639718331602117e-07, |
|
"logits/chosen": -1.0905169248580933, |
|
"logits/rejected": -1.1160600185394287, |
|
"logps/chosen": -434.937744140625, |
|
"logps/rejected": -650.6595458984375, |
|
"loss": 0.501, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.5431007146835327, |
|
"rewards/margins": 2.3286290168762207, |
|
"rewards/rejected": -3.871730327606201, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.22792706333973128, |
|
"grad_norm": 17.479671128658037, |
|
"learning_rate": 4.7550113803741275e-07, |
|
"logits/chosen": -1.1507641077041626, |
|
"logits/rejected": -1.2998677492141724, |
|
"logps/chosen": -448.26251220703125, |
|
"logps/rejected": -497.29779052734375, |
|
"loss": 0.4886, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5616766214370728, |
|
"rewards/margins": 1.1937475204467773, |
|
"rewards/rejected": -2.7554240226745605, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.23032629558541268, |
|
"grad_norm": 17.248167606427355, |
|
"learning_rate": 4.7458927474987454e-07, |
|
"logits/chosen": -1.0716644525527954, |
|
"logits/rejected": -1.1447921991348267, |
|
"logps/chosen": -426.6358947753906, |
|
"logps/rejected": -470.9082946777344, |
|
"loss": 0.4858, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1936933994293213, |
|
"rewards/margins": 1.0052495002746582, |
|
"rewards/rejected": -2.1989428997039795, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.23272552783109404, |
|
"grad_norm": 14.729008337765277, |
|
"learning_rate": 4.7366165741699347e-07, |
|
"logits/chosen": -0.9885573387145996, |
|
"logits/rejected": -1.0534632205963135, |
|
"logps/chosen": -458.31378173828125, |
|
"logps/rejected": -540.1591186523438, |
|
"loss": 0.4747, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3210374116897583, |
|
"rewards/margins": 1.1498852968215942, |
|
"rewards/rejected": -2.4709229469299316, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.23512476007677544, |
|
"grad_norm": 15.150583413082405, |
|
"learning_rate": 4.727183511074401e-07, |
|
"logits/chosen": -1.309410810470581, |
|
"logits/rejected": -1.3342196941375732, |
|
"logps/chosen": -416.07293701171875, |
|
"logps/rejected": -466.21923828125, |
|
"loss": 0.4945, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2828443050384521, |
|
"rewards/margins": 0.6490219235420227, |
|
"rewards/rejected": -1.9318662881851196, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2375239923224568, |
|
"grad_norm": 11.648714128278801, |
|
"learning_rate": 4.717594219904043e-07, |
|
"logits/chosen": -1.0633580684661865, |
|
"logits/rejected": -1.179321527481079, |
|
"logps/chosen": -393.45648193359375, |
|
"logps/rejected": -493.12603759765625, |
|
"loss": 0.5057, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2883937358856201, |
|
"rewards/margins": 1.364997148513794, |
|
"rewards/rejected": -2.653390884399414, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.2399232245681382, |
|
"grad_norm": 10.483921422479957, |
|
"learning_rate": 4.7078493733095393e-07, |
|
"logits/chosen": -1.0601375102996826, |
|
"logits/rejected": -1.1300022602081299, |
|
"logps/chosen": -432.314208984375, |
|
"logps/rejected": -605.5133056640625, |
|
"loss": 0.4784, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7640736103057861, |
|
"rewards/margins": 1.7046064138412476, |
|
"rewards/rejected": -3.4686806201934814, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2423224568138196, |
|
"grad_norm": 17.060662316036694, |
|
"learning_rate": 4.6979496548531614e-07, |
|
"logits/chosen": -1.243939757347107, |
|
"logits/rejected": -1.223625898361206, |
|
"logps/chosen": -446.4366760253906, |
|
"logps/rejected": -638.4450073242188, |
|
"loss": 0.5126, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8022911548614502, |
|
"rewards/margins": 1.4770129919052124, |
|
"rewards/rejected": -3.279303789138794, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.24472168905950095, |
|
"grad_norm": 12.903823112622515, |
|
"learning_rate": 4.6878957589608293e-07, |
|
"logits/chosen": -1.1194379329681396, |
|
"logits/rejected": -1.0698894262313843, |
|
"logps/chosen": -409.68603515625, |
|
"logps/rejected": -603.4207763671875, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4294629096984863, |
|
"rewards/margins": 1.5245181322097778, |
|
"rewards/rejected": -2.9539809226989746, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.24712092130518235, |
|
"grad_norm": 10.384415695069938, |
|
"learning_rate": 4.6776883908733956e-07, |
|
"logits/chosen": -1.1999337673187256, |
|
"logits/rejected": -1.311231255531311, |
|
"logps/chosen": -397.4037780761719, |
|
"logps/rejected": -460.95269775390625, |
|
"loss": 0.4774, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.9981710314750671, |
|
"rewards/margins": 1.341308832168579, |
|
"rewards/rejected": -2.339479684829712, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2495201535508637, |
|
"grad_norm": 14.761052654024631, |
|
"learning_rate": 4.667328266597178e-07, |
|
"logits/chosen": -1.1137980222702026, |
|
"logits/rejected": -1.1541672945022583, |
|
"logps/chosen": -391.23504638671875, |
|
"logps/rejected": -516.2595825195312, |
|
"loss": 0.4633, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3899133205413818, |
|
"rewards/margins": 1.2868678569793701, |
|
"rewards/rejected": -2.676781177520752, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2519193857965451, |
|
"grad_norm": 10.417201545289524, |
|
"learning_rate": 4.6568161128537354e-07, |
|
"logits/chosen": -1.0899343490600586, |
|
"logits/rejected": -1.247287631034851, |
|
"logps/chosen": -420.63836669921875, |
|
"logps/rejected": -514.6077270507812, |
|
"loss": 0.4964, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.5773645639419556, |
|
"rewards/margins": 1.506489872932434, |
|
"rewards/rejected": -3.0838541984558105, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2543186180422265, |
|
"grad_norm": 16.488420937432345, |
|
"learning_rate": 4.6461526670288877e-07, |
|
"logits/chosen": -1.1203404664993286, |
|
"logits/rejected": -1.1449543237686157, |
|
"logps/chosen": -407.5829162597656, |
|
"logps/rejected": -497.4512634277344, |
|
"loss": 0.493, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3892755508422852, |
|
"rewards/margins": 1.1714465618133545, |
|
"rewards/rejected": -2.5607221126556396, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2567178502879079, |
|
"grad_norm": 9.466776525081485, |
|
"learning_rate": 4.635338677120994e-07, |
|
"logits/chosen": -1.3948705196380615, |
|
"logits/rejected": -1.3905651569366455, |
|
"logps/chosen": -379.10009765625, |
|
"logps/rejected": -558.11328125, |
|
"loss": 0.4566, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.116693139076233, |
|
"rewards/margins": 1.6451349258422852, |
|
"rewards/rejected": -2.7618279457092285, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2591170825335892, |
|
"grad_norm": 14.2505130371337, |
|
"learning_rate": 4.6243749016884835e-07, |
|
"logits/chosen": -1.1959508657455444, |
|
"logits/rejected": -1.2822418212890625, |
|
"logps/chosen": -459.30999755859375, |
|
"logps/rejected": -783.4131469726562, |
|
"loss": 0.5047, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8917354345321655, |
|
"rewards/margins": 2.781670331954956, |
|
"rewards/rejected": -4.673405647277832, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2615163147792706, |
|
"grad_norm": 16.476931783493725, |
|
"learning_rate": 4.613262109796645e-07, |
|
"logits/chosen": -1.2336928844451904, |
|
"logits/rejected": -1.1787619590759277, |
|
"logps/chosen": -439.15838623046875, |
|
"logps/rejected": -729.7471313476562, |
|
"loss": 0.4727, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9186598062515259, |
|
"rewards/margins": 2.601553440093994, |
|
"rewards/rejected": -4.5202131271362305, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.263915547024952, |
|
"grad_norm": 12.359629943883897, |
|
"learning_rate": 4.602001080963678e-07, |
|
"logits/chosen": -1.1227662563323975, |
|
"logits/rejected": -1.1912363767623901, |
|
"logps/chosen": -392.65057373046875, |
|
"logps/rejected": -619.9832763671875, |
|
"loss": 0.4556, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2552802562713623, |
|
"rewards/margins": 2.460552215576172, |
|
"rewards/rejected": -3.715832233428955, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2663147792706334, |
|
"grad_norm": 19.000478036791087, |
|
"learning_rate": 4.590592605106017e-07, |
|
"logits/chosen": -1.1630818843841553, |
|
"logits/rejected": -1.2058923244476318, |
|
"logps/chosen": -423.0645446777344, |
|
"logps/rejected": -619.4017333984375, |
|
"loss": 0.5008, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.3737356662750244, |
|
"rewards/margins": 2.270371198654175, |
|
"rewards/rejected": -3.6441073417663574, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.2687140115163148, |
|
"grad_norm": 11.988638837694287, |
|
"learning_rate": 4.5790374824829165e-07, |
|
"logits/chosen": -1.1366350650787354, |
|
"logits/rejected": -1.2177644968032837, |
|
"logps/chosen": -313.95233154296875, |
|
"logps/rejected": -553.4268798828125, |
|
"loss": 0.5165, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2247995138168335, |
|
"rewards/margins": 2.2755985260009766, |
|
"rewards/rejected": -3.5003979206085205, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.27111324376199614, |
|
"grad_norm": 13.389751916846311, |
|
"learning_rate": 4.5673365236403216e-07, |
|
"logits/chosen": -1.1376798152923584, |
|
"logits/rejected": -1.2225024700164795, |
|
"logps/chosen": -418.84930419921875, |
|
"logps/rejected": -644.3150024414062, |
|
"loss": 0.4922, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.1243631839752197, |
|
"rewards/margins": 1.9992843866348267, |
|
"rewards/rejected": -4.123647212982178, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.27351247600767753, |
|
"grad_norm": 12.066478642007997, |
|
"learning_rate": 4.5554905493540075e-07, |
|
"logits/chosen": -1.393936038017273, |
|
"logits/rejected": -1.4359791278839111, |
|
"logps/chosen": -366.07171630859375, |
|
"logps/rejected": -646.8983154296875, |
|
"loss": 0.4248, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4785449504852295, |
|
"rewards/margins": 2.75530743598938, |
|
"rewards/rejected": -4.233852386474609, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2759117082533589, |
|
"grad_norm": 13.594931228595337, |
|
"learning_rate": 4.5435003905720074e-07, |
|
"logits/chosen": -1.3401994705200195, |
|
"logits/rejected": -1.4189189672470093, |
|
"logps/chosen": -555.1502685546875, |
|
"logps/rejected": -733.55908203125, |
|
"loss": 0.4941, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.782365560531616, |
|
"rewards/margins": 2.0489964485168457, |
|
"rewards/rejected": -4.831361770629883, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2783109404990403, |
|
"grad_norm": 10.959828441128586, |
|
"learning_rate": 4.531366888356324e-07, |
|
"logits/chosen": -1.3572887182235718, |
|
"logits/rejected": -1.316543459892273, |
|
"logps/chosen": -379.75048828125, |
|
"logps/rejected": -709.0888061523438, |
|
"loss": 0.421, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.861644983291626, |
|
"rewards/margins": 2.909771680831909, |
|
"rewards/rejected": -4.771416664123535, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2807101727447217, |
|
"grad_norm": 11.9806643935228, |
|
"learning_rate": 4.519090893823931e-07, |
|
"logits/chosen": -1.2535573244094849, |
|
"logits/rejected": -1.2999234199523926, |
|
"logps/chosen": -466.22979736328125, |
|
"logps/rejected": -587.1083984375, |
|
"loss": 0.4706, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.177558422088623, |
|
"rewards/margins": 1.3151285648345947, |
|
"rewards/rejected": -3.4926867485046387, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.28310940499040305, |
|
"grad_norm": 15.64676518237564, |
|
"learning_rate": 4.5066732680870734e-07, |
|
"logits/chosen": -1.1491421461105347, |
|
"logits/rejected": -1.2824984788894653, |
|
"logps/chosen": -410.51129150390625, |
|
"logps/rejected": -574.32861328125, |
|
"loss": 0.4552, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5698862075805664, |
|
"rewards/margins": 2.0116872787475586, |
|
"rewards/rejected": -3.581573486328125, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.28550863723608444, |
|
"grad_norm": 14.866463710844934, |
|
"learning_rate": 4.494114882192862e-07, |
|
"logits/chosen": -1.1980191469192505, |
|
"logits/rejected": -1.2435463666915894, |
|
"logps/chosen": -425.57733154296875, |
|
"logps/rejected": -676.6328735351562, |
|
"loss": 0.456, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.72724187374115, |
|
"rewards/margins": 2.7374391555786133, |
|
"rewards/rejected": -4.4646806716918945, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.28790786948176583, |
|
"grad_norm": 13.193324388633975, |
|
"learning_rate": 4.4814166170621735e-07, |
|
"logits/chosen": -1.4249297380447388, |
|
"logits/rejected": -1.497604489326477, |
|
"logps/chosen": -480.2257385253906, |
|
"logps/rejected": -622.1727294921875, |
|
"loss": 0.4656, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2823410034179688, |
|
"rewards/margins": 1.655609130859375, |
|
"rewards/rejected": -3.9379496574401855, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2903071017274472, |
|
"grad_norm": 20.768443614327058, |
|
"learning_rate": 4.468579363427858e-07, |
|
"logits/chosen": -1.2781771421432495, |
|
"logits/rejected": -1.3352419137954712, |
|
"logps/chosen": -420.2730407714844, |
|
"logps/rejected": -640.3319091796875, |
|
"loss": 0.4615, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5896456241607666, |
|
"rewards/margins": 2.4936366081237793, |
|
"rewards/rejected": -4.083281517028809, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2927063339731286, |
|
"grad_norm": 9.971205156379035, |
|
"learning_rate": 4.4556040217722555e-07, |
|
"logits/chosen": -1.236127257347107, |
|
"logits/rejected": -1.2078077793121338, |
|
"logps/chosen": -356.2068786621094, |
|
"logps/rejected": -533.8853759765625, |
|
"loss": 0.4674, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.1397576332092285, |
|
"rewards/margins": 1.5811337232589722, |
|
"rewards/rejected": -2.7208914756774902, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.29510556621880996, |
|
"grad_norm": 12.163666083646278, |
|
"learning_rate": 4.442491502264033e-07, |
|
"logits/chosen": -1.1941020488739014, |
|
"logits/rejected": -1.2168632745742798, |
|
"logps/chosen": -363.7603454589844, |
|
"logps/rejected": -455.57867431640625, |
|
"loss": 0.4612, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3447378873825073, |
|
"rewards/margins": 1.0612128973007202, |
|
"rewards/rejected": -2.4059505462646484, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.29750479846449135, |
|
"grad_norm": 9.273351878722064, |
|
"learning_rate": 4.429242724694338e-07, |
|
"logits/chosen": -1.3338253498077393, |
|
"logits/rejected": -1.336753010749817, |
|
"logps/chosen": -395.6485595703125, |
|
"logps/rejected": -632.2241821289062, |
|
"loss": 0.454, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4914841651916504, |
|
"rewards/margins": 2.230437994003296, |
|
"rewards/rejected": -3.7219223976135254, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.29990403071017274, |
|
"grad_norm": 25.05777651266455, |
|
"learning_rate": 4.4158586184122817e-07, |
|
"logits/chosen": -1.1749566793441772, |
|
"logits/rejected": -1.271209478378296, |
|
"logps/chosen": -429.14306640625, |
|
"logps/rejected": -604.9749755859375, |
|
"loss": 0.4598, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3869397640228271, |
|
"rewards/margins": 2.094210386276245, |
|
"rewards/rejected": -3.4811503887176514, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.30230326295585414, |
|
"grad_norm": 17.369219336254258, |
|
"learning_rate": 4.4023401222597443e-07, |
|
"logits/chosen": -1.027753233909607, |
|
"logits/rejected": -1.1785060167312622, |
|
"logps/chosen": -425.7173767089844, |
|
"logps/rejected": -544.3839721679688, |
|
"loss": 0.4667, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5711647272109985, |
|
"rewards/margins": 1.3786556720733643, |
|
"rewards/rejected": -2.9498205184936523, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.30470249520153553, |
|
"grad_norm": 11.861504310327616, |
|
"learning_rate": 4.3886881845055235e-07, |
|
"logits/chosen": -1.1897116899490356, |
|
"logits/rejected": -1.299993872642517, |
|
"logps/chosen": -375.5953674316406, |
|
"logps/rejected": -659.8858032226562, |
|
"loss": 0.4618, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.3118809461593628, |
|
"rewards/margins": 2.890842914581299, |
|
"rewards/rejected": -4.202723979949951, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.30710172744721687, |
|
"grad_norm": 10.8218616423801, |
|
"learning_rate": 4.374903762778814e-07, |
|
"logits/chosen": -1.3571842908859253, |
|
"logits/rejected": -1.385545253753662, |
|
"logps/chosen": -494.83709716796875, |
|
"logps/rejected": -658.3067016601562, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.376838445663452, |
|
"rewards/margins": 1.8850828409194946, |
|
"rewards/rejected": -4.261920928955078, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.30950095969289826, |
|
"grad_norm": 10.14042165637657, |
|
"learning_rate": 4.3609878240020356e-07, |
|
"logits/chosen": -1.2036101818084717, |
|
"logits/rejected": -1.3167588710784912, |
|
"logps/chosen": -486.25811767578125, |
|
"logps/rejected": -651.5155639648438, |
|
"loss": 0.4531, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9182531833648682, |
|
"rewards/margins": 2.2668986320495605, |
|
"rewards/rejected": -4.18515157699585, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.31190019193857965, |
|
"grad_norm": 11.299969604518076, |
|
"learning_rate": 4.346941344323005e-07, |
|
"logits/chosen": -1.376947045326233, |
|
"logits/rejected": -1.4665632247924805, |
|
"logps/chosen": -451.6893615722656, |
|
"logps/rejected": -499.0320739746094, |
|
"loss": 0.4913, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.929490089416504, |
|
"rewards/margins": 1.0548813343048096, |
|
"rewards/rejected": -2.9843716621398926, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.31429942418426104, |
|
"grad_norm": 11.980383531649544, |
|
"learning_rate": 4.332765309046467e-07, |
|
"logits/chosen": -1.332617998123169, |
|
"logits/rejected": -1.3639962673187256, |
|
"logps/chosen": -426.499755859375, |
|
"logps/rejected": -626.6729736328125, |
|
"loss": 0.474, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.678342580795288, |
|
"rewards/margins": 2.378535747528076, |
|
"rewards/rejected": -4.056878566741943, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.31669865642994244, |
|
"grad_norm": 14.013248425816212, |
|
"learning_rate": 4.3184607125649754e-07, |
|
"logits/chosen": -1.2517986297607422, |
|
"logits/rejected": -1.3113311529159546, |
|
"logps/chosen": -414.6436462402344, |
|
"logps/rejected": -657.0100708007812, |
|
"loss": 0.4847, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3406541347503662, |
|
"rewards/margins": 2.270404815673828, |
|
"rewards/rejected": -3.6110591888427734, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3190978886756238, |
|
"grad_norm": 15.333659280580797, |
|
"learning_rate": 4.304028558289141e-07, |
|
"logits/chosen": -1.4721230268478394, |
|
"logits/rejected": -1.506519079208374, |
|
"logps/chosen": -451.8147888183594, |
|
"logps/rejected": -667.3796997070312, |
|
"loss": 0.448, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6051162481307983, |
|
"rewards/margins": 2.3636975288391113, |
|
"rewards/rejected": -3.96881365776062, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.32149712092130517, |
|
"grad_norm": 12.587601683578118, |
|
"learning_rate": 4.28946985857725e-07, |
|
"logits/chosen": -1.5413029193878174, |
|
"logits/rejected": -1.5675899982452393, |
|
"logps/chosen": -508.3623046875, |
|
"logps/rejected": -787.8271484375, |
|
"loss": 0.4284, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.329979658126831, |
|
"rewards/margins": 2.8664588928222656, |
|
"rewards/rejected": -5.196438789367676, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.32389635316698656, |
|
"grad_norm": 11.206457061422094, |
|
"learning_rate": 4.2747856346642445e-07, |
|
"logits/chosen": -1.1610701084136963, |
|
"logits/rejected": -1.1690763235092163, |
|
"logps/chosen": -401.5397033691406, |
|
"logps/rejected": -605.7808227539062, |
|
"loss": 0.4051, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7380218505859375, |
|
"rewards/margins": 2.2142810821533203, |
|
"rewards/rejected": -3.952302932739258, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.32629558541266795, |
|
"grad_norm": 20.124180714207323, |
|
"learning_rate": 4.2599769165900933e-07, |
|
"logits/chosen": -1.1436104774475098, |
|
"logits/rejected": -1.193645715713501, |
|
"logps/chosen": -501.67706298828125, |
|
"logps/rejected": -850.97314453125, |
|
"loss": 0.4949, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.594907283782959, |
|
"rewards/margins": 3.6968417167663574, |
|
"rewards/rejected": -6.291749000549316, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.32869481765834935, |
|
"grad_norm": 8.824763672957205, |
|
"learning_rate": 4.245044743127535e-07, |
|
"logits/chosen": -1.2460205554962158, |
|
"logits/rejected": -1.1843246221542358, |
|
"logps/chosen": -402.1271667480469, |
|
"logps/rejected": -616.098388671875, |
|
"loss": 0.4785, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.439296007156372, |
|
"rewards/margins": 2.2133936882019043, |
|
"rewards/rejected": -3.6526896953582764, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3310940499040307, |
|
"grad_norm": 14.598877417461923, |
|
"learning_rate": 4.229990161709214e-07, |
|
"logits/chosen": -1.2217421531677246, |
|
"logits/rejected": -1.130197286605835, |
|
"logps/chosen": -352.7733459472656, |
|
"logps/rejected": -632.5406494140625, |
|
"loss": 0.463, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2021641731262207, |
|
"rewards/margins": 2.610610246658325, |
|
"rewards/rejected": -3.8127739429473877, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3334932821497121, |
|
"grad_norm": 11.865002342507843, |
|
"learning_rate": 4.214814228354204e-07, |
|
"logits/chosen": -1.342158555984497, |
|
"logits/rejected": -1.390978217124939, |
|
"logps/chosen": -457.16705322265625, |
|
"logps/rejected": -833.06787109375, |
|
"loss": 0.4364, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.8238433599472046, |
|
"rewards/margins": 3.916942596435547, |
|
"rewards/rejected": -5.740786075592041, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.33589251439539347, |
|
"grad_norm": 12.72331809654516, |
|
"learning_rate": 4.1995180075939375e-07, |
|
"logits/chosen": -1.4785504341125488, |
|
"logits/rejected": -1.4822914600372314, |
|
"logps/chosen": -445.6946716308594, |
|
"logps/rejected": -639.58837890625, |
|
"loss": 0.4643, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7562789916992188, |
|
"rewards/margins": 2.1691954135894775, |
|
"rewards/rejected": -3.925474166870117, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.33829174664107486, |
|
"grad_norm": 11.703724239093889, |
|
"learning_rate": 4.1841025723975297e-07, |
|
"logits/chosen": -1.1503616571426392, |
|
"logits/rejected": -1.1993227005004883, |
|
"logps/chosen": -395.007080078125, |
|
"logps/rejected": -638.5355224609375, |
|
"loss": 0.4344, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.1847431659698486, |
|
"rewards/margins": 2.6262497901916504, |
|
"rewards/rejected": -3.810993194580078, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.34069097888675626, |
|
"grad_norm": 19.639469762609966, |
|
"learning_rate": 4.168569004096516e-07, |
|
"logits/chosen": -1.2111709117889404, |
|
"logits/rejected": -1.1946176290512085, |
|
"logps/chosen": -405.03558349609375, |
|
"logps/rejected": -638.8396606445312, |
|
"loss": 0.4405, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7199838161468506, |
|
"rewards/margins": 2.166039228439331, |
|
"rewards/rejected": -3.886023759841919, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.3430902111324376, |
|
"grad_norm": 12.824816837365054, |
|
"learning_rate": 4.152918392308997e-07, |
|
"logits/chosen": -1.4239578247070312, |
|
"logits/rejected": -1.4245679378509521, |
|
"logps/chosen": -430.7586364746094, |
|
"logps/rejected": -618.7017822265625, |
|
"loss": 0.4354, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.7669651508331299, |
|
"rewards/margins": 1.9917770624160767, |
|
"rewards/rejected": -3.758742094039917, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.345489443378119, |
|
"grad_norm": 17.48149790816653, |
|
"learning_rate": 4.137151834863213e-07, |
|
"logits/chosen": -1.307308554649353, |
|
"logits/rejected": -1.2468369007110596, |
|
"logps/chosen": -443.11370849609375, |
|
"logps/rejected": -816.6782836914062, |
|
"loss": 0.5016, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0893607139587402, |
|
"rewards/margins": 3.403632640838623, |
|
"rewards/rejected": -5.492993354797363, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3478886756238004, |
|
"grad_norm": 13.277912286181408, |
|
"learning_rate": 4.121270437720526e-07, |
|
"logits/chosen": -1.192663550376892, |
|
"logits/rejected": -1.184259295463562, |
|
"logps/chosen": -402.0312194824219, |
|
"logps/rejected": -540.152587890625, |
|
"loss": 0.4536, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9467941522598267, |
|
"rewards/margins": 0.9955675005912781, |
|
"rewards/rejected": -2.942361354827881, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3502879078694818, |
|
"grad_norm": 12.024084039884944, |
|
"learning_rate": 4.105275314897852e-07, |
|
"logits/chosen": -1.3382481336593628, |
|
"logits/rejected": -1.3211191892623901, |
|
"logps/chosen": -403.6156005859375, |
|
"logps/rejected": -820.3019409179688, |
|
"loss": 0.4487, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7442741394042969, |
|
"rewards/margins": 3.908841609954834, |
|
"rewards/rejected": -5.653115272521973, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.35268714011516317, |
|
"grad_norm": 10.938380648082374, |
|
"learning_rate": 4.089167588389508e-07, |
|
"logits/chosen": -1.0116260051727295, |
|
"logits/rejected": -1.1302238702774048, |
|
"logps/chosen": -524.224853515625, |
|
"logps/rejected": -730.9097900390625, |
|
"loss": 0.4594, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8979120254516602, |
|
"rewards/margins": 2.4903345108032227, |
|
"rewards/rejected": -4.388247489929199, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.3550863723608445, |
|
"grad_norm": 17.953634346330745, |
|
"learning_rate": 4.072948388088515e-07, |
|
"logits/chosen": -1.1399272680282593, |
|
"logits/rejected": -1.185240387916565, |
|
"logps/chosen": -480.47869873046875, |
|
"logps/rejected": -711.10107421875, |
|
"loss": 0.4749, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0881600379943848, |
|
"rewards/margins": 2.2733359336853027, |
|
"rewards/rejected": -4.3614959716796875, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.3574856046065259, |
|
"grad_norm": 15.479182257867892, |
|
"learning_rate": 4.056618851707334e-07, |
|
"logits/chosen": -1.2174913883209229, |
|
"logits/rejected": -1.3078429698944092, |
|
"logps/chosen": -448.73626708984375, |
|
"logps/rejected": -778.0083618164062, |
|
"loss": 0.4081, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.7068407535552979, |
|
"rewards/margins": 3.3156890869140625, |
|
"rewards/rejected": -5.022529602050781, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.3598848368522073, |
|
"grad_norm": 12.258518175047803, |
|
"learning_rate": 4.0401801246980675e-07, |
|
"logits/chosen": -1.3552016019821167, |
|
"logits/rejected": -1.4195324182510376, |
|
"logps/chosen": -483.03607177734375, |
|
"logps/rejected": -829.8291015625, |
|
"loss": 0.4493, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.638827323913574, |
|
"rewards/margins": 3.691737651824951, |
|
"rewards/rejected": -6.330564498901367, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3622840690978887, |
|
"grad_norm": 12.234674147688299, |
|
"learning_rate": 4.0236333601721043e-07, |
|
"logits/chosen": -1.267978310585022, |
|
"logits/rejected": -1.2512781620025635, |
|
"logps/chosen": -463.58636474609375, |
|
"logps/rejected": -589.7911376953125, |
|
"loss": 0.4873, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8561378717422485, |
|
"rewards/margins": 1.1768932342529297, |
|
"rewards/rejected": -3.0330309867858887, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.3646833013435701, |
|
"grad_norm": 14.004595080556923, |
|
"learning_rate": 4.0069797188192364e-07, |
|
"logits/chosen": -1.1959376335144043, |
|
"logits/rejected": -1.1977354288101196, |
|
"logps/chosen": -557.3121337890625, |
|
"logps/rejected": -894.349609375, |
|
"loss": 0.4741, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.671919345855713, |
|
"rewards/margins": 3.7750651836395264, |
|
"rewards/rejected": -6.446984767913818, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.3670825335892514, |
|
"grad_norm": 12.107627085595226, |
|
"learning_rate": 3.9902203688262417e-07, |
|
"logits/chosen": -1.2063888311386108, |
|
"logits/rejected": -1.294390082359314, |
|
"logps/chosen": -402.6402282714844, |
|
"logps/rejected": -547.6096801757812, |
|
"loss": 0.4266, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.433250904083252, |
|
"rewards/margins": 1.5607125759124756, |
|
"rewards/rejected": -2.9939634799957275, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.3694817658349328, |
|
"grad_norm": 17.020506697194886, |
|
"learning_rate": 3.9733564857949365e-07, |
|
"logits/chosen": -1.2349357604980469, |
|
"logits/rejected": -1.3519177436828613, |
|
"logps/chosen": -479.85992431640625, |
|
"logps/rejected": -652.7718505859375, |
|
"loss": 0.4142, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8144111633300781, |
|
"rewards/margins": 2.1435914039611816, |
|
"rewards/rejected": -3.958002805709839, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.3718809980806142, |
|
"grad_norm": 17.340247743555018, |
|
"learning_rate": 3.9563892526597177e-07, |
|
"logits/chosen": -1.342357873916626, |
|
"logits/rejected": -1.2987167835235596, |
|
"logps/chosen": -361.8048400878906, |
|
"logps/rejected": -495.60577392578125, |
|
"loss": 0.4381, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3809950351715088, |
|
"rewards/margins": 0.9643322825431824, |
|
"rewards/rejected": -2.345327138900757, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.3742802303262956, |
|
"grad_norm": 12.57132648844664, |
|
"learning_rate": 3.9393198596045795e-07, |
|
"logits/chosen": -1.2646602392196655, |
|
"logits/rejected": -1.227052927017212, |
|
"logps/chosen": -390.56402587890625, |
|
"logps/rejected": -566.3883666992188, |
|
"loss": 0.4795, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.529387354850769, |
|
"rewards/margins": 1.646651029586792, |
|
"rewards/rejected": -3.176038980484009, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.376679462571977, |
|
"grad_norm": 9.769186078821475, |
|
"learning_rate": 3.922149503979628e-07, |
|
"logits/chosen": -1.0911178588867188, |
|
"logits/rejected": -1.1452914476394653, |
|
"logps/chosen": -446.0682678222656, |
|
"logps/rejected": -891.4172973632812, |
|
"loss": 0.421, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7286949157714844, |
|
"rewards/margins": 4.412930011749268, |
|
"rewards/rejected": -6.141624927520752, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.3790786948176583, |
|
"grad_norm": 15.13387973536505, |
|
"learning_rate": 3.904879390217095e-07, |
|
"logits/chosen": -1.2228879928588867, |
|
"logits/rejected": -1.2863072156906128, |
|
"logps/chosen": -414.58251953125, |
|
"logps/rejected": -585.94189453125, |
|
"loss": 0.4376, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5823562145233154, |
|
"rewards/margins": 1.9467941522598267, |
|
"rewards/rejected": -3.529151201248169, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3814779270633397, |
|
"grad_norm": 15.692773841294583, |
|
"learning_rate": 3.8875107297468463e-07, |
|
"logits/chosen": -1.1607332229614258, |
|
"logits/rejected": -1.1135156154632568, |
|
"logps/chosen": -394.6246032714844, |
|
"logps/rejected": -685.8594970703125, |
|
"loss": 0.4893, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5657716989517212, |
|
"rewards/margins": 2.4296836853027344, |
|
"rewards/rejected": -3.995455503463745, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.3838771593090211, |
|
"grad_norm": 12.22013342174461, |
|
"learning_rate": 3.87004474091141e-07, |
|
"logits/chosen": -1.0785077810287476, |
|
"logits/rejected": -1.145101547241211, |
|
"logps/chosen": -388.1426696777344, |
|
"logps/rejected": -562.6093139648438, |
|
"loss": 0.45, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6640561819076538, |
|
"rewards/margins": 1.593210220336914, |
|
"rewards/rejected": -3.2572665214538574, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3862763915547025, |
|
"grad_norm": 12.125863891345588, |
|
"learning_rate": 3.8524826488805114e-07, |
|
"logits/chosen": -1.2912501096725464, |
|
"logits/rejected": -1.281894326210022, |
|
"logps/chosen": -448.44403076171875, |
|
"logps/rejected": -577.4244384765625, |
|
"loss": 0.4996, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6368385553359985, |
|
"rewards/margins": 1.7040477991104126, |
|
"rewards/rejected": -3.3408865928649902, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.3886756238003839, |
|
"grad_norm": 14.772144560930354, |
|
"learning_rate": 3.834825685565133e-07, |
|
"logits/chosen": -1.2755136489868164, |
|
"logits/rejected": -1.3778313398361206, |
|
"logps/chosen": -365.7793273925781, |
|
"logps/rejected": -445.6787109375, |
|
"loss": 0.4148, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2803051471710205, |
|
"rewards/margins": 1.1510274410247803, |
|
"rewards/rejected": -2.43133282661438, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.39107485604606523, |
|
"grad_norm": 19.441822036099662, |
|
"learning_rate": 3.8170750895311007e-07, |
|
"logits/chosen": -1.168717622756958, |
|
"logits/rejected": -1.1627219915390015, |
|
"logps/chosen": -418.5533752441406, |
|
"logps/rejected": -577.6104736328125, |
|
"loss": 0.4001, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3612974882125854, |
|
"rewards/margins": 1.8237950801849365, |
|
"rewards/rejected": -3.1850924491882324, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.3934740882917466, |
|
"grad_norm": 15.432355872695538, |
|
"learning_rate": 3.7992321059122045e-07, |
|
"logits/chosen": -1.1575825214385986, |
|
"logits/rejected": -1.2952228784561157, |
|
"logps/chosen": -471.82476806640625, |
|
"logps/rejected": -670.2379150390625, |
|
"loss": 0.4553, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2020068168640137, |
|
"rewards/margins": 2.1685726642608643, |
|
"rewards/rejected": -4.370579242706299, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.395873320537428, |
|
"grad_norm": 12.344277319747519, |
|
"learning_rate": 3.7812979863228576e-07, |
|
"logits/chosen": -1.3181376457214355, |
|
"logits/rejected": -1.3618929386138916, |
|
"logps/chosen": -485.92034912109375, |
|
"logps/rejected": -651.4783935546875, |
|
"loss": 0.4458, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.687350034713745, |
|
"rewards/margins": 1.5618057250976562, |
|
"rewards/rejected": -4.2491559982299805, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3982725527831094, |
|
"grad_norm": 15.718103189453073, |
|
"learning_rate": 3.763273988770296e-07, |
|
"logits/chosen": -1.1789578199386597, |
|
"logits/rejected": -1.2662181854248047, |
|
"logps/chosen": -411.53680419921875, |
|
"logps/rejected": -600.5362548828125, |
|
"loss": 0.4555, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7146021127700806, |
|
"rewards/margins": 1.884450912475586, |
|
"rewards/rejected": -3.599053144454956, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4006717850287908, |
|
"grad_norm": 12.906974103488265, |
|
"learning_rate": 3.7451613775663405e-07, |
|
"logits/chosen": -1.1591131687164307, |
|
"logits/rejected": -1.1079394817352295, |
|
"logps/chosen": -392.81610107421875, |
|
"logps/rejected": -686.8541259765625, |
|
"loss": 0.4617, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5098912715911865, |
|
"rewards/margins": 2.9055066108703613, |
|
"rewards/rejected": -4.415398120880127, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.40307101727447214, |
|
"grad_norm": 17.744184430696986, |
|
"learning_rate": 3.726961423238706e-07, |
|
"logits/chosen": -1.2879854440689087, |
|
"logits/rejected": -1.2753901481628418, |
|
"logps/chosen": -382.4233703613281, |
|
"logps/rejected": -630.2597045898438, |
|
"loss": 0.4446, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5524839162826538, |
|
"rewards/margins": 2.3134608268737793, |
|
"rewards/rejected": -3.8659446239471436, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.40547024952015354, |
|
"grad_norm": 15.938147948338413, |
|
"learning_rate": 3.708675402441882e-07, |
|
"logits/chosen": -1.146555781364441, |
|
"logits/rejected": -1.3221074342727661, |
|
"logps/chosen": -438.2669372558594, |
|
"logps/rejected": -592.5673217773438, |
|
"loss": 0.4849, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6606773138046265, |
|
"rewards/margins": 1.8763577938079834, |
|
"rewards/rejected": -3.5370349884033203, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.40786948176583493, |
|
"grad_norm": 15.397723594978256, |
|
"learning_rate": 3.6903045978675775e-07, |
|
"logits/chosen": -1.1809333562850952, |
|
"logits/rejected": -1.2159626483917236, |
|
"logps/chosen": -393.14300537109375, |
|
"logps/rejected": -634.4575805664062, |
|
"loss": 0.4445, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5431654453277588, |
|
"rewards/margins": 2.6182663440704346, |
|
"rewards/rejected": -4.161431789398193, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4102687140115163, |
|
"grad_norm": 12.520789981032921, |
|
"learning_rate": 3.6718502981547474e-07, |
|
"logits/chosen": -1.2585941553115845, |
|
"logits/rejected": -1.2499480247497559, |
|
"logps/chosen": -395.173828125, |
|
"logps/rejected": -598.7399291992188, |
|
"loss": 0.4262, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.478262186050415, |
|
"rewards/margins": 1.6322393417358398, |
|
"rewards/rejected": -3.110501527786255, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.4126679462571977, |
|
"grad_norm": 14.28113094156497, |
|
"learning_rate": 3.6533137977991986e-07, |
|
"logits/chosen": -1.1053855419158936, |
|
"logits/rejected": -1.1341431140899658, |
|
"logps/chosen": -424.7064514160156, |
|
"logps/rejected": -587.2310791015625, |
|
"loss": 0.5011, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4273463487625122, |
|
"rewards/margins": 1.3417994976043701, |
|
"rewards/rejected": -2.769145965576172, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.41506717850287905, |
|
"grad_norm": 11.265817004608927, |
|
"learning_rate": 3.6346963970627865e-07, |
|
"logits/chosen": -1.1037083864212036, |
|
"logits/rejected": -1.0224764347076416, |
|
"logps/chosen": -393.5522766113281, |
|
"logps/rejected": -614.2374877929688, |
|
"loss": 0.4456, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5364679098129272, |
|
"rewards/margins": 2.105459690093994, |
|
"rewards/rejected": -3.641927719116211, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.41746641074856045, |
|
"grad_norm": 11.977913591571605, |
|
"learning_rate": 3.615999401882207e-07, |
|
"logits/chosen": -1.3552181720733643, |
|
"logits/rejected": -1.3371044397354126, |
|
"logps/chosen": -418.01397705078125, |
|
"logps/rejected": -775.3851318359375, |
|
"loss": 0.453, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.8135935068130493, |
|
"rewards/margins": 3.5035622119903564, |
|
"rewards/rejected": -5.317155361175537, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.41986564299424184, |
|
"grad_norm": 11.503365691015834, |
|
"learning_rate": 3.597224123777389e-07, |
|
"logits/chosen": -1.2816386222839355, |
|
"logits/rejected": -1.2878687381744385, |
|
"logps/chosen": -511.79852294921875, |
|
"logps/rejected": -887.9742431640625, |
|
"loss": 0.447, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.595597743988037, |
|
"rewards/margins": 3.664611339569092, |
|
"rewards/rejected": -6.260209083557129, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.42226487523992323, |
|
"grad_norm": 16.17377166072833, |
|
"learning_rate": 3.5783718797595e-07, |
|
"logits/chosen": -1.2984836101531982, |
|
"logits/rejected": -1.399364709854126, |
|
"logps/chosen": -505.53204345703125, |
|
"logps/rejected": -702.7299194335938, |
|
"loss": 0.4559, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.0332143306732178, |
|
"rewards/margins": 2.416463851928711, |
|
"rewards/rejected": -4.449678421020508, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4246641074856046, |
|
"grad_norm": 12.49839489285334, |
|
"learning_rate": 3.559443992238558e-07, |
|
"logits/chosen": -1.3506227731704712, |
|
"logits/rejected": -1.409407615661621, |
|
"logps/chosen": -397.6769714355469, |
|
"logps/rejected": -840.98095703125, |
|
"loss": 0.4406, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.526439905166626, |
|
"rewards/margins": 4.196308135986328, |
|
"rewards/rejected": -5.722747802734375, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.42706333973128596, |
|
"grad_norm": 10.138858801029569, |
|
"learning_rate": 3.540441788930673e-07, |
|
"logits/chosen": -1.3410276174545288, |
|
"logits/rejected": -1.414222002029419, |
|
"logps/chosen": -491.10986328125, |
|
"logps/rejected": -747.819091796875, |
|
"loss": 0.4169, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9301306009292603, |
|
"rewards/margins": 2.9560461044311523, |
|
"rewards/rejected": -4.886176109313965, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.42946257197696736, |
|
"grad_norm": 14.446911222851618, |
|
"learning_rate": 3.5213666027649123e-07, |
|
"logits/chosen": -1.3879473209381104, |
|
"logits/rejected": -1.5012261867523193, |
|
"logps/chosen": -495.0065002441406, |
|
"logps/rejected": -607.099853515625, |
|
"loss": 0.4593, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.185396194458008, |
|
"rewards/margins": 1.5175437927246094, |
|
"rewards/rejected": -3.702939510345459, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.43186180422264875, |
|
"grad_norm": 15.239290825165414, |
|
"learning_rate": 3.5022197717898017e-07, |
|
"logits/chosen": -1.2657089233398438, |
|
"logits/rejected": -1.4074336290359497, |
|
"logps/chosen": -394.63385009765625, |
|
"logps/rejected": -730.5569458007812, |
|
"loss": 0.3917, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7406047582626343, |
|
"rewards/margins": 3.7480709552764893, |
|
"rewards/rejected": -5.488675594329834, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.43426103646833014, |
|
"grad_norm": 18.539564378032264, |
|
"learning_rate": 3.4830026390794633e-07, |
|
"logits/chosen": -1.373583436012268, |
|
"logits/rejected": -1.449741244316101, |
|
"logps/chosen": -525.2714233398438, |
|
"logps/rejected": -705.874755859375, |
|
"loss": 0.4023, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.3716514110565186, |
|
"rewards/margins": 2.285885810852051, |
|
"rewards/rejected": -4.657536506652832, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.43666026871401153, |
|
"grad_norm": 15.392536337629252, |
|
"learning_rate": 3.4637165526394104e-07, |
|
"logits/chosen": -1.4993913173675537, |
|
"logits/rejected": -1.5507056713104248, |
|
"logps/chosen": -438.17315673828125, |
|
"logps/rejected": -669.3710327148438, |
|
"loss": 0.4354, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.0398783683776855, |
|
"rewards/margins": 2.2368016242980957, |
|
"rewards/rejected": -4.276679515838623, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.43905950095969287, |
|
"grad_norm": 10.083282846912516, |
|
"learning_rate": 3.4443628653119814e-07, |
|
"logits/chosen": -1.2605036497116089, |
|
"logits/rejected": -1.272882103919983, |
|
"logps/chosen": -459.35870361328125, |
|
"logps/rejected": -793.3179931640625, |
|
"loss": 0.4781, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.9037319421768188, |
|
"rewards/margins": 2.950524091720581, |
|
"rewards/rejected": -4.854256629943848, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.44145873320537427, |
|
"grad_norm": 17.777066285729536, |
|
"learning_rate": 3.424942934681453e-07, |
|
"logits/chosen": -1.2640819549560547, |
|
"logits/rejected": -1.4133803844451904, |
|
"logps/chosen": -365.7422790527344, |
|
"logps/rejected": -581.9953002929688, |
|
"loss": 0.4291, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3039973974227905, |
|
"rewards/margins": 2.1755900382995605, |
|
"rewards/rejected": -3.4795870780944824, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.44385796545105566, |
|
"grad_norm": 21.190089730860404, |
|
"learning_rate": 3.405458122978804e-07, |
|
"logits/chosen": -1.2760121822357178, |
|
"logits/rejected": -1.3037431240081787, |
|
"logps/chosen": -427.647216796875, |
|
"logps/rejected": -589.5700073242188, |
|
"loss": 0.4056, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.554579496383667, |
|
"rewards/margins": 1.9800221920013428, |
|
"rewards/rejected": -3.5346016883850098, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.44625719769673705, |
|
"grad_norm": 19.52879089362984, |
|
"learning_rate": 3.3859097969861633e-07, |
|
"logits/chosen": -1.2224397659301758, |
|
"logits/rejected": -1.2577435970306396, |
|
"logps/chosen": -464.634033203125, |
|
"logps/rejected": -659.4881591796875, |
|
"loss": 0.4462, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7432029247283936, |
|
"rewards/margins": 2.287348508834839, |
|
"rewards/rejected": -4.030551433563232, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.44865642994241844, |
|
"grad_norm": 15.71172140020582, |
|
"learning_rate": 3.366299327940936e-07, |
|
"logits/chosen": -1.254504680633545, |
|
"logits/rejected": -1.188957929611206, |
|
"logps/chosen": -473.3033142089844, |
|
"logps/rejected": -726.52392578125, |
|
"loss": 0.4147, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.8812453746795654, |
|
"rewards/margins": 2.339358329772949, |
|
"rewards/rejected": -4.2206034660339355, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.4510556621880998, |
|
"grad_norm": 13.22641361891775, |
|
"learning_rate": 3.3466280914396117e-07, |
|
"logits/chosen": -1.2382128238677979, |
|
"logits/rejected": -1.2535500526428223, |
|
"logps/chosen": -443.01007080078125, |
|
"logps/rejected": -681.6619262695312, |
|
"loss": 0.4319, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.062727212905884, |
|
"rewards/margins": 2.200326919555664, |
|
"rewards/rejected": -4.2630534172058105, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.4534548944337812, |
|
"grad_norm": 13.21370692192793, |
|
"learning_rate": 3.326897467341281e-07, |
|
"logits/chosen": -1.234937310218811, |
|
"logits/rejected": -1.3355581760406494, |
|
"logps/chosen": -445.6819763183594, |
|
"logps/rejected": -728.594970703125, |
|
"loss": 0.4254, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.3737926483154297, |
|
"rewards/margins": 2.7364139556884766, |
|
"rewards/rejected": -5.110206127166748, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.45585412667946257, |
|
"grad_norm": 13.78564096806611, |
|
"learning_rate": 3.3071088396708335e-07, |
|
"logits/chosen": -1.2770905494689941, |
|
"logits/rejected": -1.240928292274475, |
|
"logps/chosen": -391.79736328125, |
|
"logps/rejected": -723.361083984375, |
|
"loss": 0.4619, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.881150484085083, |
|
"rewards/margins": 3.0526633262634277, |
|
"rewards/rejected": -4.93381404876709, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.45825335892514396, |
|
"grad_norm": 13.853814670591044, |
|
"learning_rate": 3.2872635965218824e-07, |
|
"logits/chosen": -1.2004590034484863, |
|
"logits/rejected": -1.2370150089263916, |
|
"logps/chosen": -512.8619995117188, |
|
"logps/rejected": -702.7752685546875, |
|
"loss": 0.4878, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.5109477043151855, |
|
"rewards/margins": 1.8365228176116943, |
|
"rewards/rejected": -4.347470283508301, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.46065259117082535, |
|
"grad_norm": 10.692863270526209, |
|
"learning_rate": 3.2673631299593905e-07, |
|
"logits/chosen": -1.173227310180664, |
|
"logits/rejected": -1.3409078121185303, |
|
"logps/chosen": -474.7144470214844, |
|
"logps/rejected": -680.1087036132812, |
|
"loss": 0.4399, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.028782606124878, |
|
"rewards/margins": 2.1353182792663574, |
|
"rewards/rejected": -4.164100646972656, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.4630518234165067, |
|
"grad_norm": 14.233116791502368, |
|
"learning_rate": 3.247408835922024e-07, |
|
"logits/chosen": -1.320565104484558, |
|
"logits/rejected": -1.2968575954437256, |
|
"logps/chosen": -573.8089599609375, |
|
"logps/rejected": -786.044189453125, |
|
"loss": 0.4368, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.6720101833343506, |
|
"rewards/margins": 2.062215566635132, |
|
"rewards/rejected": -4.734226226806641, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.4654510556621881, |
|
"grad_norm": 15.685451696438584, |
|
"learning_rate": 3.2274021141242306e-07, |
|
"logits/chosen": -1.255118489265442, |
|
"logits/rejected": -1.320111870765686, |
|
"logps/chosen": -484.41583251953125, |
|
"logps/rejected": -714.25244140625, |
|
"loss": 0.4476, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.30792236328125, |
|
"rewards/margins": 2.1702935695648193, |
|
"rewards/rejected": -4.47821569442749, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.4678502879078695, |
|
"grad_norm": 16.009709587203158, |
|
"learning_rate": 3.2073443679580613e-07, |
|
"logits/chosen": -1.112594723701477, |
|
"logits/rejected": -1.2054228782653809, |
|
"logps/chosen": -423.2367248535156, |
|
"logps/rejected": -543.6708984375, |
|
"loss": 0.4427, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6221225261688232, |
|
"rewards/margins": 1.2181508541107178, |
|
"rewards/rejected": -2.840273380279541, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.47024952015355087, |
|
"grad_norm": 11.690186448497471, |
|
"learning_rate": 3.1872370043947194e-07, |
|
"logits/chosen": -1.2797791957855225, |
|
"logits/rejected": -1.3246345520019531, |
|
"logps/chosen": -414.15234375, |
|
"logps/rejected": -695.158203125, |
|
"loss": 0.4033, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4832648038864136, |
|
"rewards/margins": 2.9206230640411377, |
|
"rewards/rejected": -4.40388822555542, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.47264875239923226, |
|
"grad_norm": 20.104770187290267, |
|
"learning_rate": 3.167081433885874e-07, |
|
"logits/chosen": -1.030788779258728, |
|
"logits/rejected": -1.1015684604644775, |
|
"logps/chosen": -513.7273559570312, |
|
"logps/rejected": -811.976318359375, |
|
"loss": 0.3855, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.102285623550415, |
|
"rewards/margins": 2.593528985977173, |
|
"rewards/rejected": -4.695814609527588, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.4750479846449136, |
|
"grad_norm": 22.35702689198335, |
|
"learning_rate": 3.14687907026472e-07, |
|
"logits/chosen": -1.1791191101074219, |
|
"logits/rejected": -1.2959524393081665, |
|
"logps/chosen": -441.61846923828125, |
|
"logps/rejected": -704.8765869140625, |
|
"loss": 0.409, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.2501838207244873, |
|
"rewards/margins": 2.3681230545043945, |
|
"rewards/rejected": -4.618307113647461, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.477447216890595, |
|
"grad_norm": 16.66080453274856, |
|
"learning_rate": 3.126631330646801e-07, |
|
"logits/chosen": -1.2385426759719849, |
|
"logits/rejected": -1.3393757343292236, |
|
"logps/chosen": -572.3016357421875, |
|
"logps/rejected": -789.0521850585938, |
|
"loss": 0.4564, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.771270275115967, |
|
"rewards/margins": 2.124401807785034, |
|
"rewards/rejected": -4.895671844482422, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"grad_norm": 13.605651174190195, |
|
"learning_rate": 3.1063396353306097e-07, |
|
"logits/chosen": -1.2395048141479492, |
|
"logits/rejected": -1.3828445672988892, |
|
"logps/chosen": -438.51312255859375, |
|
"logps/rejected": -621.2369384765625, |
|
"loss": 0.4486, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7979936599731445, |
|
"rewards/margins": 2.1893625259399414, |
|
"rewards/rejected": -3.987356185913086, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4822456813819578, |
|
"grad_norm": 13.924076058423626, |
|
"learning_rate": 3.0860054076979535e-07, |
|
"logits/chosen": -1.2428590059280396, |
|
"logits/rejected": -1.236566185951233, |
|
"logps/chosen": -467.281005859375, |
|
"logps/rejected": -621.5033569335938, |
|
"loss": 0.4229, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.916424036026001, |
|
"rewards/margins": 1.8854621648788452, |
|
"rewards/rejected": -3.8018863201141357, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.4846449136276392, |
|
"grad_norm": 10.659549298550333, |
|
"learning_rate": 3.065630074114115e-07, |
|
"logits/chosen": -1.2107280492782593, |
|
"logits/rejected": -1.2906858921051025, |
|
"logps/chosen": -437.7901916503906, |
|
"logps/rejected": -730.5772705078125, |
|
"loss": 0.4458, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5543664693832397, |
|
"rewards/margins": 3.3551418781280518, |
|
"rewards/rejected": -4.90950870513916, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.4870441458733205, |
|
"grad_norm": 11.89640175081092, |
|
"learning_rate": 3.0452150638277947e-07, |
|
"logits/chosen": -1.0864282846450806, |
|
"logits/rejected": -1.0442813634872437, |
|
"logps/chosen": -426.6461486816406, |
|
"logps/rejected": -630.3248291015625, |
|
"loss": 0.4587, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0740158557891846, |
|
"rewards/margins": 1.9162708520889282, |
|
"rewards/rejected": -3.9902865886688232, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.4894433781190019, |
|
"grad_norm": 11.776367189768084, |
|
"learning_rate": 3.024761808870856e-07, |
|
"logits/chosen": -1.307191014289856, |
|
"logits/rejected": -1.312293291091919, |
|
"logps/chosen": -370.92791748046875, |
|
"logps/rejected": -650.8178100585938, |
|
"loss": 0.3818, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.276861310005188, |
|
"rewards/margins": 2.857835531234741, |
|
"rewards/rejected": -4.1346964836120605, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.4918426103646833, |
|
"grad_norm": 25.099913631834486, |
|
"learning_rate": 3.004271743957875e-07, |
|
"logits/chosen": -1.1278274059295654, |
|
"logits/rejected": -1.1356306076049805, |
|
"logps/chosen": -495.9942321777344, |
|
"logps/rejected": -648.2416381835938, |
|
"loss": 0.4424, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.38657808303833, |
|
"rewards/margins": 1.207850694656372, |
|
"rewards/rejected": -3.594428539276123, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4942418426103647, |
|
"grad_norm": 12.705340632883466, |
|
"learning_rate": 2.983746306385499e-07, |
|
"logits/chosen": -1.3075058460235596, |
|
"logits/rejected": -1.2933059930801392, |
|
"logps/chosen": -434.21820068359375, |
|
"logps/rejected": -625.4400024414062, |
|
"loss": 0.4253, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8787765502929688, |
|
"rewards/margins": 1.7934147119522095, |
|
"rewards/rejected": -3.6721911430358887, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.4966410748560461, |
|
"grad_norm": 13.755685476664564, |
|
"learning_rate": 2.963186935931628e-07, |
|
"logits/chosen": -1.215529203414917, |
|
"logits/rejected": -1.2133440971374512, |
|
"logps/chosen": -403.08465576171875, |
|
"logps/rejected": -577.1477661132812, |
|
"loss": 0.3959, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.499050259590149, |
|
"rewards/margins": 1.7851158380508423, |
|
"rewards/rejected": -3.284165859222412, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.4990403071017274, |
|
"grad_norm": 13.292089170044676, |
|
"learning_rate": 2.9425950747544176e-07, |
|
"logits/chosen": -1.1674937009811401, |
|
"logits/rejected": -1.3074105978012085, |
|
"logps/chosen": -525.9224243164062, |
|
"logps/rejected": -763.0833740234375, |
|
"loss": 0.423, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.349907398223877, |
|
"rewards/margins": 2.648500919342041, |
|
"rewards/rejected": -4.998408317565918, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5014395393474088, |
|
"grad_norm": 18.455895560701023, |
|
"learning_rate": 2.921972167291119e-07, |
|
"logits/chosen": -1.1788341999053955, |
|
"logits/rejected": -1.2539576292037964, |
|
"logps/chosen": -472.1956481933594, |
|
"logps/rejected": -670.5008544921875, |
|
"loss": 0.4338, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9172461032867432, |
|
"rewards/margins": 2.0367343425750732, |
|
"rewards/rejected": -3.9539802074432373, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5038387715930902, |
|
"grad_norm": 13.471771310006998, |
|
"learning_rate": 2.9013196601567567e-07, |
|
"logits/chosen": -1.1498690843582153, |
|
"logits/rejected": -1.1755589246749878, |
|
"logps/chosen": -407.24163818359375, |
|
"logps/rejected": -575.2977905273438, |
|
"loss": 0.5056, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5899887084960938, |
|
"rewards/margins": 1.5084218978881836, |
|
"rewards/rejected": -3.0984106063842773, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5062380038387716, |
|
"grad_norm": 15.761320703617217, |
|
"learning_rate": 2.8806390020426555e-07, |
|
"logits/chosen": -1.139108419418335, |
|
"logits/rejected": -1.1348917484283447, |
|
"logps/chosen": -422.84893798828125, |
|
"logps/rejected": -555.7877807617188, |
|
"loss": 0.4274, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4848568439483643, |
|
"rewards/margins": 1.3276598453521729, |
|
"rewards/rejected": -2.812516689300537, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.508637236084453, |
|
"grad_norm": 17.997023645014245, |
|
"learning_rate": 2.8599316436148187e-07, |
|
"logits/chosen": -1.2534973621368408, |
|
"logits/rejected": -1.2950940132141113, |
|
"logps/chosen": -413.71490478515625, |
|
"logps/rejected": -538.5707397460938, |
|
"loss": 0.446, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.67266047000885, |
|
"rewards/margins": 1.2678474187850952, |
|
"rewards/rejected": -2.9405078887939453, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5110364683301344, |
|
"grad_norm": 12.638370779776375, |
|
"learning_rate": 2.8391990374121723e-07, |
|
"logits/chosen": -1.288747787475586, |
|
"logits/rejected": -1.3166449069976807, |
|
"logps/chosen": -454.18359375, |
|
"logps/rejected": -724.0892333984375, |
|
"loss": 0.4079, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.0871493816375732, |
|
"rewards/margins": 2.4171881675720215, |
|
"rewards/rejected": -4.504337310791016, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5134357005758158, |
|
"grad_norm": 19.25266010844361, |
|
"learning_rate": 2.818442637744669e-07, |
|
"logits/chosen": -1.332960844039917, |
|
"logits/rejected": -1.3756061792373657, |
|
"logps/chosen": -447.2140197753906, |
|
"logps/rejected": -657.7930297851562, |
|
"loss": 0.4256, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.023087978363037, |
|
"rewards/margins": 2.0285849571228027, |
|
"rewards/rejected": -4.05167293548584, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5158349328214972, |
|
"grad_norm": 17.434973094318284, |
|
"learning_rate": 2.797663900591284e-07, |
|
"logits/chosen": -1.320966124534607, |
|
"logits/rejected": -1.4106026887893677, |
|
"logps/chosen": -476.49267578125, |
|
"logps/rejected": -632.2120361328125, |
|
"loss": 0.3841, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.1607255935668945, |
|
"rewards/margins": 1.7891597747802734, |
|
"rewards/rejected": -3.949885129928589, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5182341650671785, |
|
"grad_norm": 17.474868196021465, |
|
"learning_rate": 2.776864283497874e-07, |
|
"logits/chosen": -1.2660505771636963, |
|
"logits/rejected": -1.3750221729278564, |
|
"logps/chosen": -442.905029296875, |
|
"logps/rejected": -755.1177978515625, |
|
"loss": 0.4058, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.1417970657348633, |
|
"rewards/margins": 3.1892342567443848, |
|
"rewards/rejected": -5.331031322479248, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5206333973128598, |
|
"grad_norm": 14.837534970558897, |
|
"learning_rate": 2.756045245474943e-07, |
|
"logits/chosen": -1.1648900508880615, |
|
"logits/rejected": -1.1385093927383423, |
|
"logps/chosen": -472.541015625, |
|
"logps/rejected": -685.985107421875, |
|
"loss": 0.4347, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.109835147857666, |
|
"rewards/margins": 2.0320799350738525, |
|
"rewards/rejected": -4.1419148445129395, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5230326295585412, |
|
"grad_norm": 12.732468692827648, |
|
"learning_rate": 2.7352082468952977e-07, |
|
"logits/chosen": -1.1894464492797852, |
|
"logits/rejected": -1.2552679777145386, |
|
"logps/chosen": -484.76409912109375, |
|
"logps/rejected": -814.096435546875, |
|
"loss": 0.4487, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.386613130569458, |
|
"rewards/margins": 3.1459081172943115, |
|
"rewards/rejected": -5.532520771026611, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.5254318618042226, |
|
"grad_norm": 15.991158601320864, |
|
"learning_rate": 2.7143547493916e-07, |
|
"logits/chosen": -1.2773798704147339, |
|
"logits/rejected": -1.2588646411895752, |
|
"logps/chosen": -438.006591796875, |
|
"logps/rejected": -779.8575439453125, |
|
"loss": 0.4461, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.8076789379119873, |
|
"rewards/margins": 3.383274793624878, |
|
"rewards/rejected": -5.190953254699707, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.527831094049904, |
|
"grad_norm": 15.0413189840848, |
|
"learning_rate": 2.693486215753853e-07, |
|
"logits/chosen": -1.2713805437088013, |
|
"logits/rejected": -1.2931923866271973, |
|
"logps/chosen": -474.3173828125, |
|
"logps/rejected": -771.8619384765625, |
|
"loss": 0.4418, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2440552711486816, |
|
"rewards/margins": 3.1775963306427, |
|
"rewards/rejected": -5.421651840209961, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5302303262955854, |
|
"grad_norm": 12.016086648025393, |
|
"learning_rate": 2.6726041098267805e-07, |
|
"logits/chosen": -1.1932638883590698, |
|
"logits/rejected": -1.2385377883911133, |
|
"logps/chosen": -468.5335998535156, |
|
"logps/rejected": -564.7030029296875, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.7891786098480225, |
|
"rewards/margins": 1.3260656595230103, |
|
"rewards/rejected": -3.1152443885803223, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5326295585412668, |
|
"grad_norm": 15.752188949724715, |
|
"learning_rate": 2.6517098964071507e-07, |
|
"logits/chosen": -1.2558810710906982, |
|
"logits/rejected": -1.3106260299682617, |
|
"logps/chosen": -388.71038818359375, |
|
"logps/rejected": -497.370361328125, |
|
"loss": 0.4766, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3813974857330322, |
|
"rewards/margins": 0.8968814015388489, |
|
"rewards/rejected": -2.2782788276672363, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5350287907869482, |
|
"grad_norm": 17.371358487239537, |
|
"learning_rate": 2.630805041141023e-07, |
|
"logits/chosen": -1.3759686946868896, |
|
"logits/rejected": -1.3909227848052979, |
|
"logps/chosen": -353.22833251953125, |
|
"logps/rejected": -674.2232666015625, |
|
"loss": 0.4387, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.275880217552185, |
|
"rewards/margins": 2.9993698596954346, |
|
"rewards/rejected": -4.275249481201172, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5374280230326296, |
|
"grad_norm": 15.68257354415993, |
|
"learning_rate": 2.609891010420941e-07, |
|
"logits/chosen": -1.3854516744613647, |
|
"logits/rejected": -1.3968112468719482, |
|
"logps/chosen": -449.6177673339844, |
|
"logps/rejected": -670.1103515625, |
|
"loss": 0.4195, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7667739391326904, |
|
"rewards/margins": 2.2145369052886963, |
|
"rewards/rejected": -3.981311082839966, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.539827255278311, |
|
"grad_norm": 17.255086501798605, |
|
"learning_rate": 2.5889692712830674e-07, |
|
"logits/chosen": -1.211531400680542, |
|
"logits/rejected": -1.283849835395813, |
|
"logps/chosen": -389.5179138183594, |
|
"logps/rejected": -602.1033325195312, |
|
"loss": 0.3883, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6021864414215088, |
|
"rewards/margins": 2.1839447021484375, |
|
"rewards/rejected": -3.786130905151367, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5422264875239923, |
|
"grad_norm": 23.697806480913663, |
|
"learning_rate": 2.5680412913042843e-07, |
|
"logits/chosen": -1.4293580055236816, |
|
"logits/rejected": -1.4167674779891968, |
|
"logps/chosen": -482.10028076171875, |
|
"logps/rejected": -786.3612060546875, |
|
"loss": 0.4341, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2943575382232666, |
|
"rewards/margins": 3.070589065551758, |
|
"rewards/rejected": -5.3649468421936035, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5446257197696737, |
|
"grad_norm": 20.5376691734358, |
|
"learning_rate": 2.5471085384992404e-07, |
|
"logits/chosen": -1.3646225929260254, |
|
"logits/rejected": -1.3295977115631104, |
|
"logps/chosen": -490.397705078125, |
|
"logps/rejected": -880.2039184570312, |
|
"loss": 0.4117, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.3713932037353516, |
|
"rewards/margins": 3.905015468597412, |
|
"rewards/rejected": -6.2764081954956055, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5470249520153551, |
|
"grad_norm": 33.70464819485353, |
|
"learning_rate": 2.526172481217381e-07, |
|
"logits/chosen": -1.345577597618103, |
|
"logits/rejected": -1.318164587020874, |
|
"logps/chosen": -443.73492431640625, |
|
"logps/rejected": -621.4215698242188, |
|
"loss": 0.4513, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.3964602947235107, |
|
"rewards/margins": 1.5979655981063843, |
|
"rewards/rejected": -3.9944260120391846, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5494241842610365, |
|
"grad_norm": 14.933194013231358, |
|
"learning_rate": 2.5052345880399456e-07, |
|
"logits/chosen": -1.3429863452911377, |
|
"logits/rejected": -1.419154405593872, |
|
"logps/chosen": -432.6609802246094, |
|
"logps/rejected": -600.9601440429688, |
|
"loss": 0.423, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0265350341796875, |
|
"rewards/margins": 1.6890513896942139, |
|
"rewards/rejected": -3.7155869007110596, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5518234165067178, |
|
"grad_norm": 16.16343931231014, |
|
"learning_rate": 2.4842963276769555e-07, |
|
"logits/chosen": -1.3177053928375244, |
|
"logits/rejected": -1.2920299768447876, |
|
"logps/chosen": -412.7960510253906, |
|
"logps/rejected": -659.1737060546875, |
|
"loss": 0.4355, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.99508535861969, |
|
"rewards/margins": 2.033306360244751, |
|
"rewards/rejected": -4.0283918380737305, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5542226487523992, |
|
"grad_norm": 15.4620970327947, |
|
"learning_rate": 2.463359168864189e-07, |
|
"logits/chosen": -1.1979784965515137, |
|
"logits/rejected": -1.3762614727020264, |
|
"logps/chosen": -500.4812927246094, |
|
"logps/rejected": -641.75537109375, |
|
"loss": 0.4751, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.1022884845733643, |
|
"rewards/margins": 1.735335111618042, |
|
"rewards/rejected": -3.8376235961914062, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5566218809980806, |
|
"grad_norm": 17.931731909002746, |
|
"learning_rate": 2.4424245802601555e-07, |
|
"logits/chosen": -1.2692848443984985, |
|
"logits/rejected": -1.2660033702850342, |
|
"logps/chosen": -370.6279296875, |
|
"logps/rejected": -573.5665283203125, |
|
"loss": 0.4162, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5542004108428955, |
|
"rewards/margins": 1.4760851860046387, |
|
"rewards/rejected": -3.0302860736846924, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.559021113243762, |
|
"grad_norm": 17.293463950337944, |
|
"learning_rate": 2.421494030343072e-07, |
|
"logits/chosen": -1.2170095443725586, |
|
"logits/rejected": -1.3737263679504395, |
|
"logps/chosen": -454.30224609375, |
|
"logps/rejected": -541.3650512695312, |
|
"loss": 0.5052, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.897562026977539, |
|
"rewards/margins": 1.3505643606185913, |
|
"rewards/rejected": -3.248126268386841, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5614203454894434, |
|
"grad_norm": 14.543185750157452, |
|
"learning_rate": 2.400568987307861e-07, |
|
"logits/chosen": -1.2438604831695557, |
|
"logits/rejected": -1.3291311264038086, |
|
"logps/chosen": -413.92144775390625, |
|
"logps/rejected": -483.96240234375, |
|
"loss": 0.3981, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.790085792541504, |
|
"rewards/margins": 0.866573691368103, |
|
"rewards/rejected": -2.6566593647003174, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5638195777351248, |
|
"grad_norm": 12.815153330284657, |
|
"learning_rate": 2.379650918963156e-07, |
|
"logits/chosen": -1.3083336353302002, |
|
"logits/rejected": -1.3296959400177002, |
|
"logps/chosen": -408.14068603515625, |
|
"logps/rejected": -637.8969116210938, |
|
"loss": 0.4107, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.137500524520874, |
|
"rewards/margins": 2.198113203048706, |
|
"rewards/rejected": -4.33561372756958, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5662188099808061, |
|
"grad_norm": 19.62558947066615, |
|
"learning_rate": 2.3587412926283438e-07, |
|
"logits/chosen": -1.3259559869766235, |
|
"logits/rejected": -1.3641198873519897, |
|
"logps/chosen": -511.7887268066406, |
|
"logps/rejected": -747.3419189453125, |
|
"loss": 0.4316, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.02677845954895, |
|
"rewards/margins": 2.806626796722412, |
|
"rewards/rejected": -4.833405017852783, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.5686180422264875, |
|
"grad_norm": 21.00689689996569, |
|
"learning_rate": 2.337841575030642e-07, |
|
"logits/chosen": -1.1748692989349365, |
|
"logits/rejected": -1.220413327217102, |
|
"logps/chosen": -463.1639099121094, |
|
"logps/rejected": -683.6637573242188, |
|
"loss": 0.3981, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8594157695770264, |
|
"rewards/margins": 2.1025776863098145, |
|
"rewards/rejected": -3.96199369430542, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.5710172744721689, |
|
"grad_norm": 16.379604246595516, |
|
"learning_rate": 2.316953232202206e-07, |
|
"logits/chosen": -1.2951033115386963, |
|
"logits/rejected": -1.4975831508636475, |
|
"logps/chosen": -450.65155029296875, |
|
"logps/rejected": -539.8964233398438, |
|
"loss": 0.4165, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0984275341033936, |
|
"rewards/margins": 1.536707878112793, |
|
"rewards/rejected": -3.6351349353790283, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.5734165067178503, |
|
"grad_norm": 13.015590564989903, |
|
"learning_rate": 2.2960777293772958e-07, |
|
"logits/chosen": -1.2712581157684326, |
|
"logits/rejected": -1.3997230529785156, |
|
"logps/chosen": -406.917236328125, |
|
"logps/rejected": -671.5166625976562, |
|
"loss": 0.4366, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.939073920249939, |
|
"rewards/margins": 2.8481147289276123, |
|
"rewards/rejected": -4.787188529968262, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.5758157389635317, |
|
"grad_norm": 13.201413526903568, |
|
"learning_rate": 2.2752165308894974e-07, |
|
"logits/chosen": -1.2536894083023071, |
|
"logits/rejected": -1.2781312465667725, |
|
"logps/chosen": -368.1941833496094, |
|
"logps/rejected": -593.6776123046875, |
|
"loss": 0.4295, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6818746328353882, |
|
"rewards/margins": 2.3582541942596436, |
|
"rewards/rejected": -4.040129661560059, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5782149712092131, |
|
"grad_norm": 12.990669417950757, |
|
"learning_rate": 2.254371100069005e-07, |
|
"logits/chosen": -1.1515527963638306, |
|
"logits/rejected": -1.1078431606292725, |
|
"logps/chosen": -375.81732177734375, |
|
"logps/rejected": -583.7494506835938, |
|
"loss": 0.3986, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.465872049331665, |
|
"rewards/margins": 1.888287901878357, |
|
"rewards/rejected": -3.3541598320007324, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5806142034548945, |
|
"grad_norm": 17.818623986164003, |
|
"learning_rate": 2.2335428991399725e-07, |
|
"logits/chosen": -1.242234468460083, |
|
"logits/rejected": -1.2769407033920288, |
|
"logps/chosen": -519.7703857421875, |
|
"logps/rejected": -948.0362548828125, |
|
"loss": 0.403, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.072312116622925, |
|
"rewards/margins": 4.26052188873291, |
|
"rewards/rejected": -7.332834720611572, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.5830134357005758, |
|
"grad_norm": 14.48204764987916, |
|
"learning_rate": 2.2127333891179458e-07, |
|
"logits/chosen": -1.3385140895843506, |
|
"logits/rejected": -1.3878307342529297, |
|
"logps/chosen": -430.9832458496094, |
|
"logps/rejected": -745.4675903320312, |
|
"loss": 0.4398, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.236307144165039, |
|
"rewards/margins": 2.929969310760498, |
|
"rewards/rejected": -5.166275978088379, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.5854126679462572, |
|
"grad_norm": 27.82308149849405, |
|
"learning_rate": 2.1919440297073782e-07, |
|
"logits/chosen": -1.271857500076294, |
|
"logits/rejected": -1.3383657932281494, |
|
"logps/chosen": -462.377685546875, |
|
"logps/rejected": -775.3133544921875, |
|
"loss": 0.4672, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.4519903659820557, |
|
"rewards/margins": 3.0875110626220703, |
|
"rewards/rejected": -5.539501190185547, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.5878119001919386, |
|
"grad_norm": 13.320769149199382, |
|
"learning_rate": 2.1711762791992368e-07, |
|
"logits/chosen": -1.2566578388214111, |
|
"logits/rejected": -1.29204261302948, |
|
"logps/chosen": -504.62371826171875, |
|
"logps/rejected": -647.8597412109375, |
|
"loss": 0.4486, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.186826705932617, |
|
"rewards/margins": 1.7466872930526733, |
|
"rewards/rejected": -3.9335131645202637, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.5902111324376199, |
|
"grad_norm": 15.89239004487952, |
|
"learning_rate": 2.1504315943687114e-07, |
|
"logits/chosen": -1.135602355003357, |
|
"logits/rejected": -1.110710859298706, |
|
"logps/chosen": -410.08441162109375, |
|
"logps/rejected": -693.5352783203125, |
|
"loss": 0.4104, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.8059812784194946, |
|
"rewards/margins": 2.415012836456299, |
|
"rewards/rejected": -4.220993995666504, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.5926103646833013, |
|
"grad_norm": 19.370614206098328, |
|
"learning_rate": 2.1297114303730248e-07, |
|
"logits/chosen": -1.1299896240234375, |
|
"logits/rejected": -1.0449109077453613, |
|
"logps/chosen": -411.85040283203125, |
|
"logps/rejected": -708.7200927734375, |
|
"loss": 0.4912, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.996572494506836, |
|
"rewards/margins": 2.44885516166687, |
|
"rewards/rejected": -4.445427894592285, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.5950095969289827, |
|
"grad_norm": 16.656657262376168, |
|
"learning_rate": 2.1090172406493616e-07, |
|
"logits/chosen": -1.0786526203155518, |
|
"logits/rejected": -1.0406323671340942, |
|
"logps/chosen": -351.46417236328125, |
|
"logps/rejected": -575.1397705078125, |
|
"loss": 0.3825, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.2345200777053833, |
|
"rewards/margins": 2.170752763748169, |
|
"rewards/rejected": -3.4052727222442627, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.5974088291746641, |
|
"grad_norm": 19.934123908947278, |
|
"learning_rate": 2.0883504768129146e-07, |
|
"logits/chosen": -1.242959976196289, |
|
"logits/rejected": -1.2493782043457031, |
|
"logps/chosen": -472.5721130371094, |
|
"logps/rejected": -716.5457763671875, |
|
"loss": 0.431, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.007760763168335, |
|
"rewards/margins": 2.496676206588745, |
|
"rewards/rejected": -4.504437446594238, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.5998080614203455, |
|
"grad_norm": 15.746281719328787, |
|
"learning_rate": 2.0677125885550571e-07, |
|
"logits/chosen": -1.1213797330856323, |
|
"logits/rejected": -1.3027660846710205, |
|
"logps/chosen": -431.63641357421875, |
|
"logps/rejected": -593.3057861328125, |
|
"loss": 0.4323, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9971039295196533, |
|
"rewards/margins": 1.9194910526275635, |
|
"rewards/rejected": -3.9165947437286377, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6022072936660269, |
|
"grad_norm": 23.759422926525374, |
|
"learning_rate": 2.0471050235416587e-07, |
|
"logits/chosen": -1.0919904708862305, |
|
"logits/rejected": -1.2857704162597656, |
|
"logps/chosen": -511.79132080078125, |
|
"logps/rejected": -708.4951171875, |
|
"loss": 0.3853, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.4641222953796387, |
|
"rewards/margins": 2.413705348968506, |
|
"rewards/rejected": -4.8778276443481445, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6046065259117083, |
|
"grad_norm": 25.281497673378116, |
|
"learning_rate": 2.026529227311532e-07, |
|
"logits/chosen": -1.229898452758789, |
|
"logits/rejected": -1.2374264001846313, |
|
"logps/chosen": -431.0499572753906, |
|
"logps/rejected": -689.706298828125, |
|
"loss": 0.468, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1411490440368652, |
|
"rewards/margins": 2.5051817893981934, |
|
"rewards/rejected": -4.6463303565979, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6070057581573897, |
|
"grad_norm": 14.693269652927464, |
|
"learning_rate": 2.005986643175036e-07, |
|
"logits/chosen": -1.187809944152832, |
|
"logits/rejected": -1.1687113046646118, |
|
"logps/chosen": -414.11407470703125, |
|
"logps/rejected": -739.9503173828125, |
|
"loss": 0.3685, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.609575867652893, |
|
"rewards/margins": 3.377927780151367, |
|
"rewards/rejected": -4.9875030517578125, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6094049904030711, |
|
"grad_norm": 18.59133617851427, |
|
"learning_rate": 1.9854787121128328e-07, |
|
"logits/chosen": -1.203471302986145, |
|
"logits/rejected": -1.3611973524093628, |
|
"logps/chosen": -412.1192321777344, |
|
"logps/rejected": -546.4465942382812, |
|
"loss": 0.4775, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.861853837966919, |
|
"rewards/margins": 1.7769527435302734, |
|
"rewards/rejected": -3.6388065814971924, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6118042226487524, |
|
"grad_norm": 13.920438386133542, |
|
"learning_rate": 1.9650068726748106e-07, |
|
"logits/chosen": -1.1817299127578735, |
|
"logits/rejected": -1.3077366352081299, |
|
"logps/chosen": -465.7212829589844, |
|
"logps/rejected": -663.9733276367188, |
|
"loss": 0.4559, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.955004334449768, |
|
"rewards/margins": 2.0206496715545654, |
|
"rewards/rejected": -3.975654125213623, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6142034548944337, |
|
"grad_norm": 13.161703470683147, |
|
"learning_rate": 1.9445725608791718e-07, |
|
"logits/chosen": -1.1682353019714355, |
|
"logits/rejected": -1.2265560626983643, |
|
"logps/chosen": -466.87713623046875, |
|
"logps/rejected": -877.6561279296875, |
|
"loss": 0.4242, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.123385190963745, |
|
"rewards/margins": 4.002751350402832, |
|
"rewards/rejected": -6.126136779785156, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6166026871401151, |
|
"grad_norm": 15.36104260148359, |
|
"learning_rate": 1.924177210111705e-07, |
|
"logits/chosen": -1.2761640548706055, |
|
"logits/rejected": -1.3670276403427124, |
|
"logps/chosen": -424.89581298828125, |
|
"logps/rejected": -750.1470947265625, |
|
"loss": 0.4409, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8831450939178467, |
|
"rewards/margins": 3.1273887157440186, |
|
"rewards/rejected": -5.010534286499023, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6190019193857965, |
|
"grad_norm": 10.821617135187617, |
|
"learning_rate": 1.9038222510252364e-07, |
|
"logits/chosen": -1.2255038022994995, |
|
"logits/rejected": -1.249638319015503, |
|
"logps/chosen": -406.5605773925781, |
|
"logps/rejected": -569.8077392578125, |
|
"loss": 0.4144, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.5970432758331299, |
|
"rewards/margins": 1.7124197483062744, |
|
"rewards/rejected": -3.3094630241394043, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6214011516314779, |
|
"grad_norm": 20.79834881298733, |
|
"learning_rate": 1.883509111439277e-07, |
|
"logits/chosen": -1.2082470655441284, |
|
"logits/rejected": -1.2324997186660767, |
|
"logps/chosen": -405.4540710449219, |
|
"logps/rejected": -772.509765625, |
|
"loss": 0.4118, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.863959550857544, |
|
"rewards/margins": 3.0209383964538574, |
|
"rewards/rejected": -4.8848981857299805, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6238003838771593, |
|
"grad_norm": 14.594348842540208, |
|
"learning_rate": 1.8632392162398665e-07, |
|
"logits/chosen": -1.1460466384887695, |
|
"logits/rejected": -1.148008108139038, |
|
"logps/chosen": -481.5874938964844, |
|
"logps/rejected": -762.729736328125, |
|
"loss": 0.3784, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.904550313949585, |
|
"rewards/margins": 2.8737847805023193, |
|
"rewards/rejected": -4.778334617614746, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6261996161228407, |
|
"grad_norm": 18.405220413400123, |
|
"learning_rate": 1.84301398727962e-07, |
|
"logits/chosen": -1.2919515371322632, |
|
"logits/rejected": -1.226064682006836, |
|
"logps/chosen": -384.24078369140625, |
|
"logps/rejected": -750.8485107421875, |
|
"loss": 0.4252, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.0034337043762207, |
|
"rewards/margins": 3.35615611076355, |
|
"rewards/rejected": -5.359589576721191, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6285988483685221, |
|
"grad_norm": 20.54137342064033, |
|
"learning_rate": 1.8228348432779966e-07, |
|
"logits/chosen": -1.2917633056640625, |
|
"logits/rejected": -1.313946008682251, |
|
"logps/chosen": -463.4065856933594, |
|
"logps/rejected": -708.0303955078125, |
|
"loss": 0.4268, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.310145854949951, |
|
"rewards/margins": 2.4632420539855957, |
|
"rewards/rejected": -4.773387432098389, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6309980806142035, |
|
"grad_norm": 12.212262130926057, |
|
"learning_rate": 1.8027031997217773e-07, |
|
"logits/chosen": -1.3770760297775269, |
|
"logits/rejected": -1.3869761228561401, |
|
"logps/chosen": -496.34136962890625, |
|
"logps/rejected": -1004.2086791992188, |
|
"loss": 0.3796, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.659231662750244, |
|
"rewards/margins": 4.979175090789795, |
|
"rewards/rejected": -7.638407230377197, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6333973128598849, |
|
"grad_norm": 16.20411120653905, |
|
"learning_rate": 1.7826204687657758e-07, |
|
"logits/chosen": -1.1182453632354736, |
|
"logits/rejected": -1.1282155513763428, |
|
"logps/chosen": -475.62335205078125, |
|
"logps/rejected": -599.8681030273438, |
|
"loss": 0.4083, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9853700399398804, |
|
"rewards/margins": 1.6344798803329468, |
|
"rewards/rejected": -3.619849681854248, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6357965451055663, |
|
"grad_norm": 22.936528003077683, |
|
"learning_rate": 1.762588059133781e-07, |
|
"logits/chosen": -1.1725223064422607, |
|
"logits/rejected": -1.3193824291229248, |
|
"logps/chosen": -507.3565368652344, |
|
"logps/rejected": -717.995849609375, |
|
"loss": 0.4443, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1068882942199707, |
|
"rewards/margins": 2.464982509613037, |
|
"rewards/rejected": -4.57187032699585, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6381957773512476, |
|
"grad_norm": 18.6077043898828, |
|
"learning_rate": 1.7426073760197406e-07, |
|
"logits/chosen": -1.1054435968399048, |
|
"logits/rejected": -1.0801939964294434, |
|
"logps/chosen": -478.20074462890625, |
|
"logps/rejected": -859.6195068359375, |
|
"loss": 0.4259, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2413601875305176, |
|
"rewards/margins": 3.528954029083252, |
|
"rewards/rejected": -5.7703142166137695, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6405950095969289, |
|
"grad_norm": 14.701850577255247, |
|
"learning_rate": 1.7226798209891935e-07, |
|
"logits/chosen": -1.1424671411514282, |
|
"logits/rejected": -1.3594285249710083, |
|
"logps/chosen": -486.80755615234375, |
|
"logps/rejected": -663.4140625, |
|
"loss": 0.3759, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.2926859855651855, |
|
"rewards/margins": 2.290337085723877, |
|
"rewards/rejected": -4.583022594451904, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6429942418426103, |
|
"grad_norm": 23.82001052972649, |
|
"learning_rate": 1.7028067918809535e-07, |
|
"logits/chosen": -1.220568299293518, |
|
"logits/rejected": -1.2531940937042236, |
|
"logps/chosen": -426.9730529785156, |
|
"logps/rejected": -829.8860473632812, |
|
"loss": 0.4311, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0204787254333496, |
|
"rewards/margins": 3.661693572998047, |
|
"rewards/rejected": -5.6821722984313965, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6453934740882917, |
|
"grad_norm": 20.92197517678509, |
|
"learning_rate": 1.6829896827090584e-07, |
|
"logits/chosen": -1.3699915409088135, |
|
"logits/rejected": -1.415359377861023, |
|
"logps/chosen": -504.123291015625, |
|
"logps/rejected": -585.438232421875, |
|
"loss": 0.4529, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.390109062194824, |
|
"rewards/margins": 1.1103547811508179, |
|
"rewards/rejected": -3.5004639625549316, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.6477927063339731, |
|
"grad_norm": 11.90165561763133, |
|
"learning_rate": 1.6632298835649844e-07, |
|
"logits/chosen": -1.2418677806854248, |
|
"logits/rejected": -1.2179887294769287, |
|
"logps/chosen": -498.97607421875, |
|
"logps/rejected": -764.77587890625, |
|
"loss": 0.3979, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.3297247886657715, |
|
"rewards/margins": 2.4172799587249756, |
|
"rewards/rejected": -4.747004508972168, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6501919385796545, |
|
"grad_norm": 13.166436756002133, |
|
"learning_rate": 1.6435287805201364e-07, |
|
"logits/chosen": -1.3559385538101196, |
|
"logits/rejected": -1.3355977535247803, |
|
"logps/chosen": -489.180419921875, |
|
"logps/rejected": -652.1635131835938, |
|
"loss": 0.4155, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.2472357749938965, |
|
"rewards/margins": 1.647270917892456, |
|
"rewards/rejected": -3.8945069313049316, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6525911708253359, |
|
"grad_norm": 19.18769450791739, |
|
"learning_rate": 1.6238877555286207e-07, |
|
"logits/chosen": -1.3151136636734009, |
|
"logits/rejected": -1.3525760173797607, |
|
"logps/chosen": -442.0126953125, |
|
"logps/rejected": -712.2786865234375, |
|
"loss": 0.3659, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7712196111679077, |
|
"rewards/margins": 2.6433427333831787, |
|
"rewards/rejected": -4.414562702178955, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.6549904030710173, |
|
"grad_norm": 16.006816349741847, |
|
"learning_rate": 1.60430818633031e-07, |
|
"logits/chosen": -1.1541482210159302, |
|
"logits/rejected": -1.1810917854309082, |
|
"logps/chosen": -428.27117919921875, |
|
"logps/rejected": -661.9808959960938, |
|
"loss": 0.3728, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8128198385238647, |
|
"rewards/margins": 2.3960225582122803, |
|
"rewards/rejected": -4.2088422775268555, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6573896353166987, |
|
"grad_norm": 15.843743360837514, |
|
"learning_rate": 1.5847914463541939e-07, |
|
"logits/chosen": -1.2659448385238647, |
|
"logits/rejected": -1.3333221673965454, |
|
"logps/chosen": -380.4425354003906, |
|
"logps/rejected": -661.7493896484375, |
|
"loss": 0.3829, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8144347667694092, |
|
"rewards/margins": 2.581631898880005, |
|
"rewards/rejected": -4.396066665649414, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6597888675623801, |
|
"grad_norm": 12.902781532875105, |
|
"learning_rate": 1.5653389046220427e-07, |
|
"logits/chosen": -1.202580451965332, |
|
"logits/rejected": -1.2428548336029053, |
|
"logps/chosen": -393.21392822265625, |
|
"logps/rejected": -585.6798095703125, |
|
"loss": 0.4472, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5873596668243408, |
|
"rewards/margins": 1.7750890254974365, |
|
"rewards/rejected": -3.3624484539031982, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6621880998080614, |
|
"grad_norm": 15.497438468614629, |
|
"learning_rate": 1.545951925652375e-07, |
|
"logits/chosen": -1.1852418184280396, |
|
"logits/rejected": -1.3214651346206665, |
|
"logps/chosen": -513.821044921875, |
|
"logps/rejected": -740.9768676757812, |
|
"loss": 0.4159, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.0691330432891846, |
|
"rewards/margins": 2.799996852874756, |
|
"rewards/rejected": -4.8691301345825195, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6645873320537428, |
|
"grad_norm": 25.023616546810636, |
|
"learning_rate": 1.5266318693647423e-07, |
|
"logits/chosen": -1.2369322776794434, |
|
"logits/rejected": -1.2678784132003784, |
|
"logps/chosen": -495.04425048828125, |
|
"logps/rejected": -637.2513427734375, |
|
"loss": 0.4159, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.1835405826568604, |
|
"rewards/margins": 1.4997859001159668, |
|
"rewards/rejected": -3.6833267211914062, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.6669865642994242, |
|
"grad_norm": 13.945033201288798, |
|
"learning_rate": 1.5073800909843353e-07, |
|
"logits/chosen": -1.1865065097808838, |
|
"logits/rejected": -1.3370563983917236, |
|
"logps/chosen": -465.9248046875, |
|
"logps/rejected": -663.2384033203125, |
|
"loss": 0.4064, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0080044269561768, |
|
"rewards/margins": 2.541308879852295, |
|
"rewards/rejected": -4.549313545227051, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6693857965451055, |
|
"grad_norm": 16.335549570588938, |
|
"learning_rate": 1.488197940946922e-07, |
|
"logits/chosen": -1.090932846069336, |
|
"logits/rejected": -1.1426491737365723, |
|
"logps/chosen": -454.6915588378906, |
|
"logps/rejected": -645.7820434570312, |
|
"loss": 0.3909, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7976535558700562, |
|
"rewards/margins": 2.447206735610962, |
|
"rewards/rejected": -4.244860649108887, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.6717850287907869, |
|
"grad_norm": 19.689368835083336, |
|
"learning_rate": 1.4690867648041167e-07, |
|
"logits/chosen": -1.0205479860305786, |
|
"logits/rejected": -1.2015823125839233, |
|
"logps/chosen": -444.68115234375, |
|
"logps/rejected": -664.7821044921875, |
|
"loss": 0.4133, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8803367614746094, |
|
"rewards/margins": 2.4233222007751465, |
|
"rewards/rejected": -4.303658485412598, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6741842610364683, |
|
"grad_norm": 15.922653221989023, |
|
"learning_rate": 1.4500479031289987e-07, |
|
"logits/chosen": -1.121628999710083, |
|
"logits/rejected": -1.2732969522476196, |
|
"logps/chosen": -426.40631103515625, |
|
"logps/rejected": -628.9437255859375, |
|
"loss": 0.4774, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.6420685052871704, |
|
"rewards/margins": 2.0467495918273926, |
|
"rewards/rejected": -3.6888179779052734, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.6765834932821497, |
|
"grad_norm": 10.688957500109868, |
|
"learning_rate": 1.4310826914220747e-07, |
|
"logits/chosen": -1.1144278049468994, |
|
"logits/rejected": -1.2004985809326172, |
|
"logps/chosen": -465.1630859375, |
|
"logps/rejected": -635.3046264648438, |
|
"loss": 0.4528, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7992807626724243, |
|
"rewards/margins": 1.7720779180526733, |
|
"rewards/rejected": -3.5713589191436768, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.6789827255278311, |
|
"grad_norm": 11.408686793770782, |
|
"learning_rate": 1.412192460017597e-07, |
|
"logits/chosen": -1.2038311958312988, |
|
"logits/rejected": -1.162626028060913, |
|
"logps/chosen": -476.5159606933594, |
|
"logps/rejected": -705.89990234375, |
|
"loss": 0.4191, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.387838840484619, |
|
"rewards/margins": 2.2411282062530518, |
|
"rewards/rejected": -4.628966331481934, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.6813819577735125, |
|
"grad_norm": 12.118691511541517, |
|
"learning_rate": 1.3933785339902504e-07, |
|
"logits/chosen": -1.2565038204193115, |
|
"logits/rejected": -1.167474389076233, |
|
"logps/chosen": -400.8456115722656, |
|
"logps/rejected": -641.7380981445312, |
|
"loss": 0.4456, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9326709508895874, |
|
"rewards/margins": 1.984291672706604, |
|
"rewards/rejected": -3.9169623851776123, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.6837811900191939, |
|
"grad_norm": 13.932235400427288, |
|
"learning_rate": 1.374642233062197e-07, |
|
"logits/chosen": -1.1590187549591064, |
|
"logits/rejected": -1.3027136325836182, |
|
"logps/chosen": -474.519287109375, |
|
"logps/rejected": -681.8683471679688, |
|
"loss": 0.4318, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.970078468322754, |
|
"rewards/margins": 2.387824535369873, |
|
"rewards/rejected": -4.357902526855469, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6861804222648752, |
|
"grad_norm": 16.723737190217008, |
|
"learning_rate": 1.355984871510511e-07, |
|
"logits/chosen": -1.1410120725631714, |
|
"logits/rejected": -1.1267549991607666, |
|
"logps/chosen": -505.823974609375, |
|
"logps/rejected": -715.5396118164062, |
|
"loss": 0.3923, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.1921565532684326, |
|
"rewards/margins": 2.031604290008545, |
|
"rewards/rejected": -4.223761081695557, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.6885796545105566, |
|
"grad_norm": 21.50806215898013, |
|
"learning_rate": 1.3374077580749783e-07, |
|
"logits/chosen": -1.3120447397232056, |
|
"logits/rejected": -1.281110405921936, |
|
"logps/chosen": -368.84185791015625, |
|
"logps/rejected": -601.4373168945312, |
|
"loss": 0.4163, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7995599508285522, |
|
"rewards/margins": 2.158607006072998, |
|
"rewards/rejected": -3.958167314529419, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.690978886756238, |
|
"grad_norm": 27.638375107613143, |
|
"learning_rate": 1.3189121958663024e-07, |
|
"logits/chosen": -1.1347416639328003, |
|
"logits/rejected": -1.3342390060424805, |
|
"logps/chosen": -551.9705810546875, |
|
"logps/rejected": -637.0390625, |
|
"loss": 0.4529, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.7998149394989014, |
|
"rewards/margins": 1.0880420207977295, |
|
"rewards/rejected": -3.8878567218780518, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.6933781190019194, |
|
"grad_norm": 14.692498350185678, |
|
"learning_rate": 1.3004994822746895e-07, |
|
"logits/chosen": -1.2893893718719482, |
|
"logits/rejected": -1.3418468236923218, |
|
"logps/chosen": -442.1363220214844, |
|
"logps/rejected": -626.1585083007812, |
|
"loss": 0.4302, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9635156393051147, |
|
"rewards/margins": 1.7742655277252197, |
|
"rewards/rejected": -3.737781047821045, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.6957773512476008, |
|
"grad_norm": 13.677081786503933, |
|
"learning_rate": 1.2821709088788434e-07, |
|
"logits/chosen": -1.0876823663711548, |
|
"logits/rejected": -1.1584521532058716, |
|
"logps/chosen": -408.7326965332031, |
|
"logps/rejected": -630.5333862304688, |
|
"loss": 0.4069, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.062255620956421, |
|
"rewards/margins": 2.1946072578430176, |
|
"rewards/rejected": -4.256862163543701, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6981765834932822, |
|
"grad_norm": 15.872787924761283, |
|
"learning_rate": 1.2639277613553736e-07, |
|
"logits/chosen": -1.3518760204315186, |
|
"logits/rejected": -1.3280441761016846, |
|
"logps/chosen": -398.5044860839844, |
|
"logps/rejected": -600.9732666015625, |
|
"loss": 0.4195, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9100825786590576, |
|
"rewards/margins": 2.0270023345947266, |
|
"rewards/rejected": -3.9370853900909424, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7005758157389635, |
|
"grad_norm": 13.346767559623201, |
|
"learning_rate": 1.2457713193885975e-07, |
|
"logits/chosen": -1.1873770952224731, |
|
"logits/rejected": -1.1855942010879517, |
|
"logps/chosen": -413.6412048339844, |
|
"logps/rejected": -724.1486206054688, |
|
"loss": 0.3688, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.3553316593170166, |
|
"rewards/margins": 2.758777379989624, |
|
"rewards/rejected": -5.114109516143799, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7029750479846449, |
|
"grad_norm": 22.448762027485316, |
|
"learning_rate": 1.2277028565807838e-07, |
|
"logits/chosen": -1.2834995985031128, |
|
"logits/rejected": -1.3559261560440063, |
|
"logps/chosen": -453.78985595703125, |
|
"logps/rejected": -671.7575073242188, |
|
"loss": 0.4267, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.049879550933838, |
|
"rewards/margins": 2.2892394065856934, |
|
"rewards/rejected": -4.339118957519531, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7053742802303263, |
|
"grad_norm": 16.129648043941792, |
|
"learning_rate": 1.209723640362815e-07, |
|
"logits/chosen": -1.1959068775177002, |
|
"logits/rejected": -1.2405879497528076, |
|
"logps/chosen": -505.00726318359375, |
|
"logps/rejected": -860.0382690429688, |
|
"loss": 0.4607, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.3982629776000977, |
|
"rewards/margins": 3.6283717155456543, |
|
"rewards/rejected": -6.026634693145752, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7077735124760077, |
|
"grad_norm": 14.433857430526624, |
|
"learning_rate": 1.191834931905277e-07, |
|
"logits/chosen": -1.1443761587142944, |
|
"logits/rejected": -1.1771003007888794, |
|
"logps/chosen": -516.5400390625, |
|
"logps/rejected": -709.7727661132812, |
|
"loss": 0.4379, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.209808826446533, |
|
"rewards/margins": 1.9394118785858154, |
|
"rewards/rejected": -4.1492204666137695, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.710172744721689, |
|
"grad_norm": 14.063698311506704, |
|
"learning_rate": 1.1740379860299988e-07, |
|
"logits/chosen": -1.1812469959259033, |
|
"logits/rejected": -1.1932779550552368, |
|
"logps/chosen": -486.75848388671875, |
|
"logps/rejected": -688.697509765625, |
|
"loss": 0.4358, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0185728073120117, |
|
"rewards/margins": 1.8188155889511108, |
|
"rewards/rejected": -3.837387800216675, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.7125719769673704, |
|
"grad_norm": 12.25394441206106, |
|
"learning_rate": 1.1563340511220254e-07, |
|
"logits/chosen": -1.1238670349121094, |
|
"logits/rejected": -1.2288951873779297, |
|
"logps/chosen": -479.03082275390625, |
|
"logps/rejected": -721.0484008789062, |
|
"loss": 0.4231, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9672508239746094, |
|
"rewards/margins": 2.5986568927764893, |
|
"rewards/rejected": -4.5659074783325195, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7149712092130518, |
|
"grad_norm": 14.233686065176078, |
|
"learning_rate": 1.1387243690420556e-07, |
|
"logits/chosen": -1.1306841373443604, |
|
"logits/rejected": -1.2058961391448975, |
|
"logps/chosen": -530.5966796875, |
|
"logps/rejected": -791.8387451171875, |
|
"loss": 0.4661, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.097524881362915, |
|
"rewards/margins": 2.7319130897521973, |
|
"rewards/rejected": -4.829438209533691, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7173704414587332, |
|
"grad_norm": 19.693131800311516, |
|
"learning_rate": 1.1212101750393235e-07, |
|
"logits/chosen": -1.255438208580017, |
|
"logits/rejected": -1.3434498310089111, |
|
"logps/chosen": -474.38916015625, |
|
"logps/rejected": -754.1934814453125, |
|
"loss": 0.4109, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.2069497108459473, |
|
"rewards/margins": 2.954502582550049, |
|
"rewards/rejected": -5.161452293395996, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7197696737044146, |
|
"grad_norm": 16.97782338199475, |
|
"learning_rate": 1.1037926976649562e-07, |
|
"logits/chosen": -1.1937129497528076, |
|
"logits/rejected": -1.2495759725570679, |
|
"logps/chosen": -488.2049865722656, |
|
"logps/rejected": -799.8907470703125, |
|
"loss": 0.4527, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2300727367401123, |
|
"rewards/margins": 2.9182958602905273, |
|
"rewards/rejected": -5.148368835449219, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.722168905950096, |
|
"grad_norm": 18.393491636386905, |
|
"learning_rate": 1.0864731586857936e-07, |
|
"logits/chosen": -1.1326544284820557, |
|
"logits/rejected": -1.2836530208587646, |
|
"logps/chosen": -472.63739013671875, |
|
"logps/rejected": -724.0669555664062, |
|
"loss": 0.3849, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8869655132293701, |
|
"rewards/margins": 2.8039302825927734, |
|
"rewards/rejected": -4.6908955574035645, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7245681381957774, |
|
"grad_norm": 20.305303573951132, |
|
"learning_rate": 1.0692527729986839e-07, |
|
"logits/chosen": -1.117078423500061, |
|
"logits/rejected": -1.2159783840179443, |
|
"logps/chosen": -495.0431213378906, |
|
"logps/rejected": -729.7164306640625, |
|
"loss": 0.3795, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.3929858207702637, |
|
"rewards/margins": 2.559861660003662, |
|
"rewards/rejected": -4.952847003936768, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7269673704414588, |
|
"grad_norm": 18.149863459319363, |
|
"learning_rate": 1.0521327485452692e-07, |
|
"logits/chosen": -1.2258936166763306, |
|
"logits/rejected": -1.2951385974884033, |
|
"logps/chosen": -507.068359375, |
|
"logps/rejected": -806.07421875, |
|
"loss": 0.4145, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.737776279449463, |
|
"rewards/margins": 3.1096084117889404, |
|
"rewards/rejected": -5.847384452819824, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7293666026871402, |
|
"grad_norm": 22.723618390263812, |
|
"learning_rate": 1.0351142862272468e-07, |
|
"logits/chosen": -1.1216206550598145, |
|
"logits/rejected": -1.2867326736450195, |
|
"logps/chosen": -473.12408447265625, |
|
"logps/rejected": -896.2477416992188, |
|
"loss": 0.4208, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.6412878036499023, |
|
"rewards/margins": 4.341336727142334, |
|
"rewards/rejected": -6.982624053955078, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7317658349328215, |
|
"grad_norm": 19.527492956725595, |
|
"learning_rate": 1.0181985798221343e-07, |
|
"logits/chosen": -1.1790878772735596, |
|
"logits/rejected": -1.1905752420425415, |
|
"logps/chosen": -470.6045837402344, |
|
"logps/rejected": -782.1622924804688, |
|
"loss": 0.4267, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.268146514892578, |
|
"rewards/margins": 3.100985527038574, |
|
"rewards/rejected": -5.369132041931152, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7341650671785028, |
|
"grad_norm": 17.486447949626083, |
|
"learning_rate": 1.0013868158995329e-07, |
|
"logits/chosen": -1.1854205131530762, |
|
"logits/rejected": -1.268317461013794, |
|
"logps/chosen": -508.628173828125, |
|
"logps/rejected": -689.4181518554688, |
|
"loss": 0.4241, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.384272575378418, |
|
"rewards/margins": 2.0019237995147705, |
|
"rewards/rejected": -4.386197090148926, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.7365642994241842, |
|
"grad_norm": 11.876302215183998, |
|
"learning_rate": 9.84680173737887e-08, |
|
"logits/chosen": -1.2807575464248657, |
|
"logits/rejected": -1.3796226978302002, |
|
"logps/chosen": -456.07244873046875, |
|
"logps/rejected": -617.9744873046875, |
|
"loss": 0.4315, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.917088508605957, |
|
"rewards/margins": 2.016084909439087, |
|
"rewards/rejected": -3.933173418045044, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.7389635316698656, |
|
"grad_norm": 13.157616210751735, |
|
"learning_rate": 9.680798252417713e-08, |
|
"logits/chosen": -1.3769404888153076, |
|
"logits/rejected": -1.4445879459381104, |
|
"logps/chosen": -420.9883728027344, |
|
"logps/rejected": -629.974853515625, |
|
"loss": 0.4059, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0882909297943115, |
|
"rewards/margins": 1.72428297996521, |
|
"rewards/rejected": -3.8125743865966797, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.741362763915547, |
|
"grad_norm": 15.730225571388548, |
|
"learning_rate": 9.515869348596808e-08, |
|
"logits/chosen": -1.1365947723388672, |
|
"logits/rejected": -1.275075912475586, |
|
"logps/chosen": -472.18408203125, |
|
"logps/rejected": -672.9304809570312, |
|
"loss": 0.4284, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.8900762796401978, |
|
"rewards/margins": 2.209829807281494, |
|
"rewards/rejected": -4.099905967712402, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.7437619961612284, |
|
"grad_norm": 35.145335130920884, |
|
"learning_rate": 9.352026595023493e-08, |
|
"logits/chosen": -1.1994316577911377, |
|
"logits/rejected": -1.2499196529388428, |
|
"logps/chosen": -472.410888671875, |
|
"logps/rejected": -594.9708251953125, |
|
"loss": 0.4289, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9588143825531006, |
|
"rewards/margins": 1.4960181713104248, |
|
"rewards/rejected": -3.4548325538635254, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7461612284069098, |
|
"grad_norm": 15.811033639076967, |
|
"learning_rate": 9.189281484616004e-08, |
|
"logits/chosen": -1.2232351303100586, |
|
"logits/rejected": -1.2345741987228394, |
|
"logps/chosen": -410.38848876953125, |
|
"logps/rejected": -693.65673828125, |
|
"loss": 0.4377, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.075925588607788, |
|
"rewards/margins": 2.4454538822174072, |
|
"rewards/rejected": -4.521379470825195, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.7485604606525912, |
|
"grad_norm": 22.73378264594113, |
|
"learning_rate": 9.027645433297249e-08, |
|
"logits/chosen": -1.103428602218628, |
|
"logits/rejected": -1.1875020265579224, |
|
"logps/chosen": -571.9773559570312, |
|
"logps/rejected": -778.9340209960938, |
|
"loss": 0.4763, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.662141799926758, |
|
"rewards/margins": 2.4210867881774902, |
|
"rewards/rejected": -5.083228588104248, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.7509596928982726, |
|
"grad_norm": 25.696268435535774, |
|
"learning_rate": 8.867129779194066e-08, |
|
"logits/chosen": -1.244616150856018, |
|
"logits/rejected": -1.353212594985962, |
|
"logps/chosen": -394.8204040527344, |
|
"logps/rejected": -692.91357421875, |
|
"loss": 0.4442, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.8012546300888062, |
|
"rewards/margins": 2.963744640350342, |
|
"rewards/rejected": -4.764999866485596, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.753358925143954, |
|
"grad_norm": 18.050945946975368, |
|
"learning_rate": 8.707745781841866e-08, |
|
"logits/chosen": -1.1310231685638428, |
|
"logits/rejected": -1.2564256191253662, |
|
"logps/chosen": -449.65362548828125, |
|
"logps/rejected": -771.5709838867188, |
|
"loss": 0.4261, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.2246077060699463, |
|
"rewards/margins": 3.2700328826904297, |
|
"rewards/rejected": -5.494640350341797, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.7557581573896354, |
|
"grad_norm": 9.799407812333103, |
|
"learning_rate": 8.549504621394831e-08, |
|
"logits/chosen": -1.2643756866455078, |
|
"logits/rejected": -1.2907123565673828, |
|
"logps/chosen": -408.44683837890625, |
|
"logps/rejected": -748.5787353515625, |
|
"loss": 0.3526, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.775541067123413, |
|
"rewards/margins": 3.4335663318634033, |
|
"rewards/rejected": -5.209107398986816, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.7581573896353166, |
|
"grad_norm": 19.977109925350412, |
|
"learning_rate": 8.392417397841703e-08, |
|
"logits/chosen": -1.216590166091919, |
|
"logits/rejected": -1.3006147146224976, |
|
"logps/chosen": -467.8597717285156, |
|
"logps/rejected": -650.8781127929688, |
|
"loss": 0.4394, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0549581050872803, |
|
"rewards/margins": 1.685307502746582, |
|
"rewards/rejected": -3.7402656078338623, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.760556621880998, |
|
"grad_norm": 17.465911639708498, |
|
"learning_rate": 8.236495130227083e-08, |
|
"logits/chosen": -1.2152674198150635, |
|
"logits/rejected": -1.3853117227554321, |
|
"logps/chosen": -531.36865234375, |
|
"logps/rejected": -838.9884033203125, |
|
"loss": 0.4623, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.4217429161071777, |
|
"rewards/margins": 3.4808337688446045, |
|
"rewards/rejected": -5.9025774002075195, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.7629558541266794, |
|
"grad_norm": 18.484754479900506, |
|
"learning_rate": 8.081748755878612e-08, |
|
"logits/chosen": -1.2535191774368286, |
|
"logits/rejected": -1.3943572044372559, |
|
"logps/chosen": -503.76898193359375, |
|
"logps/rejected": -699.9238891601562, |
|
"loss": 0.4074, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2457404136657715, |
|
"rewards/margins": 2.4922969341278076, |
|
"rewards/rejected": -4.738037109375, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.7653550863723608, |
|
"grad_norm": 13.480118265934307, |
|
"learning_rate": 7.928189129639632e-08, |
|
"logits/chosen": -1.1623866558074951, |
|
"logits/rejected": -1.1990493535995483, |
|
"logps/chosen": -440.8330993652344, |
|
"logps/rejected": -671.6934814453125, |
|
"loss": 0.4079, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2158052921295166, |
|
"rewards/margins": 2.1327455043792725, |
|
"rewards/rejected": -4.348550796508789, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.7677543186180422, |
|
"grad_norm": 18.218781531194935, |
|
"learning_rate": 7.775827023107834e-08, |
|
"logits/chosen": -1.2353599071502686, |
|
"logits/rejected": -1.2978723049163818, |
|
"logps/chosen": -485.01837158203125, |
|
"logps/rejected": -695.3224487304688, |
|
"loss": 0.4193, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.558899164199829, |
|
"rewards/margins": 1.8993895053863525, |
|
"rewards/rejected": -4.458288669586182, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7701535508637236, |
|
"grad_norm": 23.87409903985215, |
|
"learning_rate": 7.624673123879682e-08, |
|
"logits/chosen": -1.1395564079284668, |
|
"logits/rejected": -1.2785004377365112, |
|
"logps/chosen": -447.79144287109375, |
|
"logps/rejected": -616.0286254882812, |
|
"loss": 0.435, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.080073356628418, |
|
"rewards/margins": 1.8376333713531494, |
|
"rewards/rejected": -3.917706251144409, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.772552783109405, |
|
"grad_norm": 20.86151843851289, |
|
"learning_rate": 7.474738034800663e-08, |
|
"logits/chosen": -1.2566897869110107, |
|
"logits/rejected": -1.2571423053741455, |
|
"logps/chosen": -419.4219665527344, |
|
"logps/rejected": -851.1385498046875, |
|
"loss": 0.4659, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.008873701095581, |
|
"rewards/margins": 4.522359371185303, |
|
"rewards/rejected": -6.5312323570251465, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.7749520153550864, |
|
"grad_norm": 13.169040672352676, |
|
"learning_rate": 7.326032273221606e-08, |
|
"logits/chosen": -1.3603243827819824, |
|
"logits/rejected": -1.3747715950012207, |
|
"logps/chosen": -500.2078552246094, |
|
"logps/rejected": -785.7582397460938, |
|
"loss": 0.4021, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.25557279586792, |
|
"rewards/margins": 3.053633213043213, |
|
"rewards/rejected": -5.309205532073975, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.7773512476007678, |
|
"grad_norm": 16.05314488934455, |
|
"learning_rate": 7.178566270260872e-08, |
|
"logits/chosen": -1.318904995918274, |
|
"logits/rejected": -1.3817777633666992, |
|
"logps/chosen": -521.3937377929688, |
|
"logps/rejected": -800.31103515625, |
|
"loss": 0.4506, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.6105079650878906, |
|
"rewards/margins": 2.589186668395996, |
|
"rewards/rejected": -5.199694633483887, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.7797504798464492, |
|
"grad_norm": 14.261695457548557, |
|
"learning_rate": 7.032350370072709e-08, |
|
"logits/chosen": -1.2088521718978882, |
|
"logits/rejected": -1.2967567443847656, |
|
"logps/chosen": -463.45794677734375, |
|
"logps/rejected": -668.6795654296875, |
|
"loss": 0.4028, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.936281442642212, |
|
"rewards/margins": 2.147418975830078, |
|
"rewards/rejected": -4.083700180053711, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.7821497120921305, |
|
"grad_norm": 12.79782870680645, |
|
"learning_rate": 6.887394829121596e-08, |
|
"logits/chosen": -1.2623844146728516, |
|
"logits/rejected": -1.3965818881988525, |
|
"logps/chosen": -520.1638793945312, |
|
"logps/rejected": -901.8109130859375, |
|
"loss": 0.3906, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.5400707721710205, |
|
"rewards/margins": 4.0481061935424805, |
|
"rewards/rejected": -6.588177680969238, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.7845489443378119, |
|
"grad_norm": 16.59225411377178, |
|
"learning_rate": 6.743709815462833e-08, |
|
"logits/chosen": -1.2642148733139038, |
|
"logits/rejected": -1.3561595678329468, |
|
"logps/chosen": -521.087890625, |
|
"logps/rejected": -813.130615234375, |
|
"loss": 0.4099, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.508223056793213, |
|
"rewards/margins": 3.2700836658477783, |
|
"rewards/rejected": -5.7783074378967285, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.7869481765834933, |
|
"grad_norm": 14.993047424273575, |
|
"learning_rate": 6.601305408029287e-08, |
|
"logits/chosen": -1.2957048416137695, |
|
"logits/rejected": -1.418428659439087, |
|
"logps/chosen": -458.10076904296875, |
|
"logps/rejected": -786.9931640625, |
|
"loss": 0.3831, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.213374137878418, |
|
"rewards/margins": 3.280181407928467, |
|
"rewards/rejected": -5.493556022644043, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.7893474088291746, |
|
"grad_norm": 15.085926001703353, |
|
"learning_rate": 6.460191595924366e-08, |
|
"logits/chosen": -1.2087162733078003, |
|
"logits/rejected": -1.2654608488082886, |
|
"logps/chosen": -449.6219177246094, |
|
"logps/rejected": -705.5459594726562, |
|
"loss": 0.385, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1014938354492188, |
|
"rewards/margins": 2.5265586376190186, |
|
"rewards/rejected": -4.628052711486816, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.791746641074856, |
|
"grad_norm": 13.169922978355546, |
|
"learning_rate": 6.320378277721342e-08, |
|
"logits/chosen": -1.3274714946746826, |
|
"logits/rejected": -1.3772881031036377, |
|
"logps/chosen": -465.1910095214844, |
|
"logps/rejected": -624.9302368164062, |
|
"loss": 0.4013, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.1250336170196533, |
|
"rewards/margins": 1.715985894203186, |
|
"rewards/rejected": -3.841019868850708, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7941458733205374, |
|
"grad_norm": 27.82691688189915, |
|
"learning_rate": 6.181875260769032e-08, |
|
"logits/chosen": -1.2468100786209106, |
|
"logits/rejected": -1.4290482997894287, |
|
"logps/chosen": -521.2555541992188, |
|
"logps/rejected": -781.6026000976562, |
|
"loss": 0.416, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.3911871910095215, |
|
"rewards/margins": 3.2505409717559814, |
|
"rewards/rejected": -5.641728401184082, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.7965451055662188, |
|
"grad_norm": 15.737223970644676, |
|
"learning_rate": 6.044692260503797e-08, |
|
"logits/chosen": -1.1637942790985107, |
|
"logits/rejected": -1.2922910451889038, |
|
"logps/chosen": -529.7451171875, |
|
"logps/rejected": -853.4421997070312, |
|
"loss": 0.3675, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.4741718769073486, |
|
"rewards/margins": 3.4323413372039795, |
|
"rewards/rejected": -5.906513214111328, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.7989443378119002, |
|
"grad_norm": 13.061674320090848, |
|
"learning_rate": 5.9088388997680984e-08, |
|
"logits/chosen": -1.1912027597427368, |
|
"logits/rejected": -1.3324553966522217, |
|
"logps/chosen": -545.3333740234375, |
|
"logps/rejected": -860.1539306640625, |
|
"loss": 0.394, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.197711944580078, |
|
"rewards/margins": 3.962009906768799, |
|
"rewards/rejected": -6.159722328186035, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8013435700575816, |
|
"grad_norm": 19.85934874401157, |
|
"learning_rate": 5.774324708135439e-08, |
|
"logits/chosen": -1.3464608192443848, |
|
"logits/rejected": -1.4455270767211914, |
|
"logps/chosen": -404.1613464355469, |
|
"logps/rejected": -649.46044921875, |
|
"loss": 0.4296, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9079539775848389, |
|
"rewards/margins": 2.655972480773926, |
|
"rewards/rejected": -4.563926696777344, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.803742802303263, |
|
"grad_norm": 10.622344250001166, |
|
"learning_rate": 5.641159121241953e-08, |
|
"logits/chosen": -1.340012788772583, |
|
"logits/rejected": -1.3097373247146606, |
|
"logps/chosen": -495.234375, |
|
"logps/rejected": -826.9255981445312, |
|
"loss": 0.4075, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.605710744857788, |
|
"rewards/margins": 3.028745174407959, |
|
"rewards/rejected": -5.634456157684326, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8061420345489443, |
|
"grad_norm": 15.132572988644833, |
|
"learning_rate": 5.5093514801245106e-08, |
|
"logits/chosen": -1.2543226480484009, |
|
"logits/rejected": -1.3029847145080566, |
|
"logps/chosen": -481.427734375, |
|
"logps/rejected": -770.0972900390625, |
|
"loss": 0.4025, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.3580222129821777, |
|
"rewards/margins": 2.690171241760254, |
|
"rewards/rejected": -5.04819393157959, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8085412667946257, |
|
"grad_norm": 14.038257828539548, |
|
"learning_rate": 5.378911030565453e-08, |
|
"logits/chosen": -1.1603384017944336, |
|
"logits/rejected": -1.213220238685608, |
|
"logps/chosen": -554.8784790039062, |
|
"logps/rejected": -800.203369140625, |
|
"loss": 0.4264, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.7545363903045654, |
|
"rewards/margins": 2.177790403366089, |
|
"rewards/rejected": -4.932326793670654, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8109404990403071, |
|
"grad_norm": 14.786725967482855, |
|
"learning_rate": 5.249846922444101e-08, |
|
"logits/chosen": -1.3400354385375977, |
|
"logits/rejected": -1.4116084575653076, |
|
"logps/chosen": -469.63165283203125, |
|
"logps/rejected": -946.8123779296875, |
|
"loss": 0.3953, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.487299680709839, |
|
"rewards/margins": 4.986563682556152, |
|
"rewards/rejected": -7.473863124847412, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8133397312859885, |
|
"grad_norm": 21.495135238551697, |
|
"learning_rate": 5.122168209094865e-08, |
|
"logits/chosen": -1.2585008144378662, |
|
"logits/rejected": -1.3433778285980225, |
|
"logps/chosen": -421.2335510253906, |
|
"logps/rejected": -548.8929443359375, |
|
"loss": 0.4028, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.097749948501587, |
|
"rewards/margins": 1.2572873830795288, |
|
"rewards/rejected": -3.355037212371826, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8157389635316699, |
|
"grad_norm": 15.584784008274491, |
|
"learning_rate": 4.995883846672222e-08, |
|
"logits/chosen": -1.1034057140350342, |
|
"logits/rejected": -1.2848924398422241, |
|
"logps/chosen": -590.9141845703125, |
|
"logps/rejected": -737.7780151367188, |
|
"loss": 0.4146, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.305182933807373, |
|
"rewards/margins": 2.196429491043091, |
|
"rewards/rejected": -4.501612663269043, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8181381957773513, |
|
"grad_norm": 12.577138913457466, |
|
"learning_rate": 4.871002693522486e-08, |
|
"logits/chosen": -1.2216746807098389, |
|
"logits/rejected": -1.2335078716278076, |
|
"logps/chosen": -490.0227966308594, |
|
"logps/rejected": -685.375732421875, |
|
"loss": 0.4083, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2040867805480957, |
|
"rewards/margins": 2.2901692390441895, |
|
"rewards/rejected": -4.494256496429443, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8205374280230326, |
|
"grad_norm": 12.780423712977628, |
|
"learning_rate": 4.7475335095623956e-08, |
|
"logits/chosen": -1.2985970973968506, |
|
"logits/rejected": -1.2781016826629639, |
|
"logps/chosen": -527.1070556640625, |
|
"logps/rejected": -797.3029174804688, |
|
"loss": 0.4308, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.7410807609558105, |
|
"rewards/margins": 2.8199234008789062, |
|
"rewards/rejected": -5.561004638671875, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.822936660268714, |
|
"grad_norm": 25.453128411840094, |
|
"learning_rate": 4.6254849556646714e-08, |
|
"logits/chosen": -1.1118319034576416, |
|
"logits/rejected": -1.207648515701294, |
|
"logps/chosen": -549.860595703125, |
|
"logps/rejected": -970.7677612304688, |
|
"loss": 0.4555, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.535156488418579, |
|
"rewards/margins": 4.580714702606201, |
|
"rewards/rejected": -7.115871429443359, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.8253358925143954, |
|
"grad_norm": 14.697372336337441, |
|
"learning_rate": 4.504865593050483e-08, |
|
"logits/chosen": -1.2074692249298096, |
|
"logits/rejected": -1.22970449924469, |
|
"logps/chosen": -493.60443115234375, |
|
"logps/rejected": -733.1605224609375, |
|
"loss": 0.4486, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.2459022998809814, |
|
"rewards/margins": 2.387206792831421, |
|
"rewards/rejected": -4.633109092712402, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.8277351247600768, |
|
"grad_norm": 19.145579102229355, |
|
"learning_rate": 4.385683882688895e-08, |
|
"logits/chosen": -1.0796090364456177, |
|
"logits/rejected": -1.2019519805908203, |
|
"logps/chosen": -480.3756408691406, |
|
"logps/rejected": -558.4412841796875, |
|
"loss": 0.4614, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0972506999969482, |
|
"rewards/margins": 1.18562912940979, |
|
"rewards/rejected": -3.282879590988159, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.8301343570057581, |
|
"grad_norm": 17.204779447658726, |
|
"learning_rate": 4.2679481847033985e-08, |
|
"logits/chosen": -1.2189116477966309, |
|
"logits/rejected": -1.2864820957183838, |
|
"logps/chosen": -446.82867431640625, |
|
"logps/rejected": -702.0382080078125, |
|
"loss": 0.4466, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8683035373687744, |
|
"rewards/margins": 2.547211170196533, |
|
"rewards/rejected": -4.415513515472412, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.8325335892514395, |
|
"grad_norm": 14.516749938972369, |
|
"learning_rate": 4.151666757785435e-08, |
|
"logits/chosen": -1.1446878910064697, |
|
"logits/rejected": -1.2078421115875244, |
|
"logps/chosen": -391.42791748046875, |
|
"logps/rejected": -725.2740478515625, |
|
"loss": 0.397, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.532698631286621, |
|
"rewards/margins": 3.396210193634033, |
|
"rewards/rejected": -4.928908348083496, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.8349328214971209, |
|
"grad_norm": 13.196683844151892, |
|
"learning_rate": 4.036847758615136e-08, |
|
"logits/chosen": -1.091759443283081, |
|
"logits/rejected": -1.2269001007080078, |
|
"logps/chosen": -532.867431640625, |
|
"logps/rejected": -730.4796142578125, |
|
"loss": 0.4464, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.9313502311706543, |
|
"rewards/margins": 1.9276530742645264, |
|
"rewards/rejected": -4.859004020690918, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.8373320537428023, |
|
"grad_norm": 12.89862131476654, |
|
"learning_rate": 3.923499241289113e-08, |
|
"logits/chosen": -1.1648555994033813, |
|
"logits/rejected": -1.3147923946380615, |
|
"logps/chosen": -521.7337646484375, |
|
"logps/rejected": -674.8948364257812, |
|
"loss": 0.4302, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.260921001434326, |
|
"rewards/margins": 2.081796646118164, |
|
"rewards/rejected": -4.34271764755249, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.8397312859884837, |
|
"grad_norm": 14.173962004603878, |
|
"learning_rate": 3.811629156755541e-08, |
|
"logits/chosen": -1.160355567932129, |
|
"logits/rejected": -1.185987949371338, |
|
"logps/chosen": -477.21728515625, |
|
"logps/rejected": -679.4395751953125, |
|
"loss": 0.4274, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9270728826522827, |
|
"rewards/margins": 2.1360950469970703, |
|
"rewards/rejected": -4.063167572021484, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8421305182341651, |
|
"grad_norm": 11.942462848386326, |
|
"learning_rate": 3.701245352256391e-08, |
|
"logits/chosen": -1.2002038955688477, |
|
"logits/rejected": -1.324733018875122, |
|
"logps/chosen": -478.535400390625, |
|
"logps/rejected": -557.5496826171875, |
|
"loss": 0.4283, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.8901561498641968, |
|
"rewards/margins": 1.0154350996017456, |
|
"rewards/rejected": -2.9055914878845215, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.8445297504798465, |
|
"grad_norm": 20.5827327935098, |
|
"learning_rate": 3.592355570776984e-08, |
|
"logits/chosen": -1.173332929611206, |
|
"logits/rejected": -1.2609224319458008, |
|
"logps/chosen": -360.0851745605469, |
|
"logps/rejected": -583.3133544921875, |
|
"loss": 0.4195, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4041774272918701, |
|
"rewards/margins": 2.1920089721679688, |
|
"rewards/rejected": -3.5961861610412598, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.8469289827255279, |
|
"grad_norm": 10.471012375510664, |
|
"learning_rate": 3.484967450502904e-08, |
|
"logits/chosen": -1.1107840538024902, |
|
"logits/rejected": -1.2250497341156006, |
|
"logps/chosen": -374.8684997558594, |
|
"logps/rejected": -664.1340942382812, |
|
"loss": 0.4038, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.7912622690200806, |
|
"rewards/margins": 2.5201168060302734, |
|
"rewards/rejected": -4.3113789558410645, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.8493282149712092, |
|
"grad_norm": 20.111970761124727, |
|
"learning_rate": 3.3790885242841296e-08, |
|
"logits/chosen": -1.0951917171478271, |
|
"logits/rejected": -1.1834380626678467, |
|
"logps/chosen": -451.64190673828125, |
|
"logps/rejected": -770.8523559570312, |
|
"loss": 0.3789, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.0603652000427246, |
|
"rewards/margins": 3.2534384727478027, |
|
"rewards/rejected": -5.313803672790527, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.8517274472168906, |
|
"grad_norm": 15.121338349842414, |
|
"learning_rate": 3.274726219106677e-08, |
|
"logits/chosen": -1.051578402519226, |
|
"logits/rejected": -1.135506272315979, |
|
"logps/chosen": -485.54840087890625, |
|
"logps/rejected": -721.75439453125, |
|
"loss": 0.4442, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.1253767013549805, |
|
"rewards/margins": 2.3960883617401123, |
|
"rewards/rejected": -4.521464824676514, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.8541266794625719, |
|
"grad_norm": 15.152890802383466, |
|
"learning_rate": 3.171887855571642e-08, |
|
"logits/chosen": -1.2348651885986328, |
|
"logits/rejected": -1.2165499925613403, |
|
"logps/chosen": -395.697509765625, |
|
"logps/rejected": -543.8568115234375, |
|
"loss": 0.3826, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7078378200531006, |
|
"rewards/margins": 1.5240461826324463, |
|
"rewards/rejected": -3.231884002685547, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.8565259117082533, |
|
"grad_norm": 24.44597593565566, |
|
"learning_rate": 3.070580647381643e-08, |
|
"logits/chosen": -1.1522376537322998, |
|
"logits/rejected": -1.2483961582183838, |
|
"logps/chosen": -406.069091796875, |
|
"logps/rejected": -749.8062133789062, |
|
"loss": 0.4548, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.753334403038025, |
|
"rewards/margins": 3.3889122009277344, |
|
"rewards/rejected": -5.142246246337891, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.8589251439539347, |
|
"grad_norm": 15.075419291770242, |
|
"learning_rate": 2.9708117008348576e-08, |
|
"logits/chosen": -1.2388461828231812, |
|
"logits/rejected": -1.3630428314208984, |
|
"logps/chosen": -477.43585205078125, |
|
"logps/rejected": -610.2297973632812, |
|
"loss": 0.3969, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.715201735496521, |
|
"rewards/margins": 1.8291780948638916, |
|
"rewards/rejected": -3.544379711151123, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.8613243761996161, |
|
"grad_norm": 11.602973232421764, |
|
"learning_rate": 2.8725880143264992e-08, |
|
"logits/chosen": -1.19898521900177, |
|
"logits/rejected": -1.2185986042022705, |
|
"logps/chosen": -449.32598876953125, |
|
"logps/rejected": -633.5596923828125, |
|
"loss": 0.465, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1087489128112793, |
|
"rewards/margins": 1.4358699321746826, |
|
"rewards/rejected": -3.544618606567383, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.8637236084452975, |
|
"grad_norm": 21.3005453219283, |
|
"learning_rate": 2.775916477857948e-08, |
|
"logits/chosen": -1.1370158195495605, |
|
"logits/rejected": -1.1749649047851562, |
|
"logps/chosen": -402.0398254394531, |
|
"logps/rejected": -587.6260375976562, |
|
"loss": 0.413, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9864225387573242, |
|
"rewards/margins": 1.8402111530303955, |
|
"rewards/rejected": -3.8266334533691406, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.8661228406909789, |
|
"grad_norm": 15.387678499543219, |
|
"learning_rate": 2.680803872553408e-08, |
|
"logits/chosen": -1.2096471786499023, |
|
"logits/rejected": -1.3197107315063477, |
|
"logps/chosen": -445.0570373535156, |
|
"logps/rejected": -827.9861450195312, |
|
"loss": 0.4225, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.9052565097808838, |
|
"rewards/margins": 4.059884071350098, |
|
"rewards/rejected": -5.965140342712402, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.8685220729366603, |
|
"grad_norm": 24.644670995274364, |
|
"learning_rate": 2.5872568701842706e-08, |
|
"logits/chosen": -1.2481223344802856, |
|
"logits/rejected": -1.3282666206359863, |
|
"logps/chosen": -388.14715576171875, |
|
"logps/rejected": -630.1165161132812, |
|
"loss": 0.4617, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7502208948135376, |
|
"rewards/margins": 2.1968894004821777, |
|
"rewards/rejected": -3.9471099376678467, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.8709213051823417, |
|
"grad_norm": 20.103907535930773, |
|
"learning_rate": 2.495282032701096e-08, |
|
"logits/chosen": -1.1463677883148193, |
|
"logits/rejected": -1.3338615894317627, |
|
"logps/chosen": -351.9326477050781, |
|
"logps/rejected": -538.684814453125, |
|
"loss": 0.4117, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.522869348526001, |
|
"rewards/margins": 2.161909580230713, |
|
"rewards/rejected": -3.684778928756714, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.8733205374280231, |
|
"grad_norm": 14.949192367966893, |
|
"learning_rate": 2.4048858117733133e-08, |
|
"logits/chosen": -1.2300177812576294, |
|
"logits/rejected": -1.345139741897583, |
|
"logps/chosen": -454.37628173828125, |
|
"logps/rejected": -743.2384033203125, |
|
"loss": 0.3729, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8625742197036743, |
|
"rewards/margins": 3.337106227874756, |
|
"rewards/rejected": -5.199681282043457, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.8757197696737045, |
|
"grad_norm": 17.259398265317675, |
|
"learning_rate": 2.3160745483366938e-08, |
|
"logits/chosen": -1.2201354503631592, |
|
"logits/rejected": -1.259948492050171, |
|
"logps/chosen": -431.505126953125, |
|
"logps/rejected": -647.8377685546875, |
|
"loss": 0.4208, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0006277561187744, |
|
"rewards/margins": 1.8061168193817139, |
|
"rewards/rejected": -3.806744337081909, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.8781190019193857, |
|
"grad_norm": 26.593038878856742, |
|
"learning_rate": 2.2288544721485197e-08, |
|
"logits/chosen": -1.1473274230957031, |
|
"logits/rejected": -1.1531964540481567, |
|
"logps/chosen": -367.77691650390625, |
|
"logps/rejected": -670.8995361328125, |
|
"loss": 0.4029, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.509234070777893, |
|
"rewards/margins": 2.8308663368225098, |
|
"rewards/rejected": -4.340100288391113, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.8805182341650671, |
|
"grad_norm": 17.09126226618081, |
|
"learning_rate": 2.1432317013506117e-08, |
|
"logits/chosen": -1.2680007219314575, |
|
"logits/rejected": -1.3855565786361694, |
|
"logps/chosen": -459.4864807128906, |
|
"logps/rejected": -617.2786254882812, |
|
"loss": 0.4376, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9778245687484741, |
|
"rewards/margins": 1.9757041931152344, |
|
"rewards/rejected": -3.953528881072998, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.8829174664107485, |
|
"grad_norm": 22.964518655362124, |
|
"learning_rate": 2.0592122420401704e-08, |
|
"logits/chosen": -1.0826637744903564, |
|
"logits/rejected": -1.218787431716919, |
|
"logps/chosen": -442.7049865722656, |
|
"logps/rejected": -622.1304931640625, |
|
"loss": 0.4426, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.082751750946045, |
|
"rewards/margins": 1.7740720510482788, |
|
"rewards/rejected": -3.8568243980407715, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.8853166986564299, |
|
"grad_norm": 16.601339844348978, |
|
"learning_rate": 1.976801987848459e-08, |
|
"logits/chosen": -1.2238892316818237, |
|
"logits/rejected": -1.2646934986114502, |
|
"logps/chosen": -454.4891052246094, |
|
"logps/rejected": -779.9896850585938, |
|
"loss": 0.4293, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.8976598978042603, |
|
"rewards/margins": 3.114412784576416, |
|
"rewards/rejected": -5.012072563171387, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.8877159309021113, |
|
"grad_norm": 17.11872715651324, |
|
"learning_rate": 1.8960067195273987e-08, |
|
"logits/chosen": -1.2767010927200317, |
|
"logits/rejected": -1.3807860612869263, |
|
"logps/chosen": -399.5985412597656, |
|
"logps/rejected": -693.5151977539062, |
|
"loss": 0.3976, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8493973016738892, |
|
"rewards/margins": 2.9295060634613037, |
|
"rewards/rejected": -4.778903484344482, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8901151631477927, |
|
"grad_norm": 16.6413432588174, |
|
"learning_rate": 1.816832104544072e-08, |
|
"logits/chosen": -1.098815679550171, |
|
"logits/rejected": -1.1592333316802979, |
|
"logps/chosen": -470.98553466796875, |
|
"logps/rejected": -625.4569091796875, |
|
"loss": 0.3886, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.982060432434082, |
|
"rewards/margins": 1.7682058811187744, |
|
"rewards/rejected": -3.7502663135528564, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.8925143953934741, |
|
"grad_norm": 11.891797748013655, |
|
"learning_rate": 1.7392836966831553e-08, |
|
"logits/chosen": -1.0697181224822998, |
|
"logits/rejected": -1.194319486618042, |
|
"logps/chosen": -475.51898193359375, |
|
"logps/rejected": -717.6954345703125, |
|
"loss": 0.3918, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.0813956260681152, |
|
"rewards/margins": 2.7703046798706055, |
|
"rewards/rejected": -4.851700782775879, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.8949136276391555, |
|
"grad_norm": 17.887300709912445, |
|
"learning_rate": 1.663366935657373e-08, |
|
"logits/chosen": -1.2668213844299316, |
|
"logits/rejected": -1.4020304679870605, |
|
"logps/chosen": -410.6656188964844, |
|
"logps/rejected": -631.67041015625, |
|
"loss": 0.4465, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.830963373184204, |
|
"rewards/margins": 2.1420297622680664, |
|
"rewards/rejected": -3.9729931354522705, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.8973128598848369, |
|
"grad_norm": 21.650200131935442, |
|
"learning_rate": 1.5890871467258898e-08, |
|
"logits/chosen": -1.0380961894989014, |
|
"logits/rejected": -1.1259255409240723, |
|
"logps/chosen": -538.32568359375, |
|
"logps/rejected": -709.1238403320312, |
|
"loss": 0.4203, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.206559896469116, |
|
"rewards/margins": 2.0453429222106934, |
|
"rewards/rejected": -4.2519025802612305, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.8997120921305183, |
|
"grad_norm": 12.405877408803793, |
|
"learning_rate": 1.5164495403207967e-08, |
|
"logits/chosen": -1.2166404724121094, |
|
"logits/rejected": -1.2211335897445679, |
|
"logps/chosen": -480.22528076171875, |
|
"logps/rejected": -792.5941162109375, |
|
"loss": 0.3954, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.314863443374634, |
|
"rewards/margins": 2.8861892223358154, |
|
"rewards/rejected": -5.201052665710449, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.9021113243761996, |
|
"grad_norm": 12.728466226110546, |
|
"learning_rate": 1.4454592116815962e-08, |
|
"logits/chosen": -1.1239063739776611, |
|
"logits/rejected": -1.1524550914764404, |
|
"logps/chosen": -412.11602783203125, |
|
"logps/rejected": -646.6449584960938, |
|
"loss": 0.3604, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.66254460811615, |
|
"rewards/margins": 2.2538654804229736, |
|
"rewards/rejected": -3.916410446166992, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.904510556621881, |
|
"grad_norm": 11.308755733612493, |
|
"learning_rate": 1.3761211404977934e-08, |
|
"logits/chosen": -1.2462382316589355, |
|
"logits/rejected": -1.2617241144180298, |
|
"logps/chosen": -481.52642822265625, |
|
"logps/rejected": -763.2169189453125, |
|
"loss": 0.3466, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.5226385593414307, |
|
"rewards/margins": 2.925380229949951, |
|
"rewards/rejected": -5.448019504547119, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9069097888675623, |
|
"grad_norm": 20.409261229736146, |
|
"learning_rate": 1.3084401905596177e-08, |
|
"logits/chosen": -1.1374547481536865, |
|
"logits/rejected": -1.3039876222610474, |
|
"logps/chosen": -499.5015563964844, |
|
"logps/rejected": -709.859375, |
|
"loss": 0.4434, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0284790992736816, |
|
"rewards/margins": 2.706104040145874, |
|
"rewards/rejected": -4.734583377838135, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9093090211132437, |
|
"grad_norm": 17.1782781112593, |
|
"learning_rate": 1.2424211094168053e-08, |
|
"logits/chosen": -1.1101362705230713, |
|
"logits/rejected": -1.2460038661956787, |
|
"logps/chosen": -517.1344604492188, |
|
"logps/rejected": -742.2302856445312, |
|
"loss": 0.4041, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.067178726196289, |
|
"rewards/margins": 2.329007625579834, |
|
"rewards/rejected": -4.396185874938965, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9117082533589251, |
|
"grad_norm": 30.065326876665342, |
|
"learning_rate": 1.1780685280456143e-08, |
|
"logits/chosen": -1.237917184829712, |
|
"logits/rejected": -1.3036408424377441, |
|
"logps/chosen": -539.2681274414062, |
|
"logps/rejected": -925.9050903320312, |
|
"loss": 0.4439, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.540894031524658, |
|
"rewards/margins": 3.8281638622283936, |
|
"rewards/rejected": -6.369057655334473, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9141074856046065, |
|
"grad_norm": 20.319800037171195, |
|
"learning_rate": 1.1153869605239564e-08, |
|
"logits/chosen": -1.231994390487671, |
|
"logits/rejected": -1.3546103239059448, |
|
"logps/chosen": -441.52520751953125, |
|
"logps/rejected": -568.5699462890625, |
|
"loss": 0.4091, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.8595590591430664, |
|
"rewards/margins": 1.6289126873016357, |
|
"rewards/rejected": -3.488471508026123, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9165067178502879, |
|
"grad_norm": 17.420902765437226, |
|
"learning_rate": 1.0543808037147606e-08, |
|
"logits/chosen": -1.2463701963424683, |
|
"logits/rejected": -1.2932254076004028, |
|
"logps/chosen": -475.19195556640625, |
|
"logps/rejected": -830.6253051757812, |
|
"loss": 0.394, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.176619052886963, |
|
"rewards/margins": 3.5470027923583984, |
|
"rewards/rejected": -5.7236223220825195, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.9189059500959693, |
|
"grad_norm": 13.877437771957279, |
|
"learning_rate": 9.95054336957557e-09, |
|
"logits/chosen": -1.245715618133545, |
|
"logits/rejected": -1.245986819267273, |
|
"logps/chosen": -431.06024169921875, |
|
"logps/rejected": -607.4310302734375, |
|
"loss": 0.4043, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.873225450515747, |
|
"rewards/margins": 1.7056442499160767, |
|
"rewards/rejected": -3.578869581222534, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.9213051823416507, |
|
"grad_norm": 22.780099057725195, |
|
"learning_rate": 9.37411721768286e-09, |
|
"logits/chosen": -1.3027960062026978, |
|
"logits/rejected": -1.3543469905853271, |
|
"logps/chosen": -500.34527587890625, |
|
"logps/rejected": -797.4141845703125, |
|
"loss": 0.3995, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.248213291168213, |
|
"rewards/margins": 2.695244312286377, |
|
"rewards/rejected": -4.943457126617432, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.9237044145873321, |
|
"grad_norm": 18.879115732312695, |
|
"learning_rate": 8.81457001547392e-09, |
|
"logits/chosen": -1.179421067237854, |
|
"logits/rejected": -1.2008545398712158, |
|
"logps/chosen": -445.45611572265625, |
|
"logps/rejected": -636.3636474609375, |
|
"loss": 0.3706, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.106031894683838, |
|
"rewards/margins": 1.8034794330596924, |
|
"rewards/rejected": -3.909511089324951, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.9261036468330134, |
|
"grad_norm": 13.946836764722176, |
|
"learning_rate": 8.271941012961942e-09, |
|
"logits/chosen": -1.1962236166000366, |
|
"logits/rejected": -1.1757996082305908, |
|
"logps/chosen": -434.50238037109375, |
|
"logps/rejected": -851.7151489257812, |
|
"loss": 0.4029, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2081894874572754, |
|
"rewards/margins": 3.6091766357421875, |
|
"rewards/rejected": -5.817366600036621, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.9285028790786948, |
|
"grad_norm": 21.809897538914896, |
|
"learning_rate": 7.746268273415568e-09, |
|
"logits/chosen": -1.3298779726028442, |
|
"logits/rejected": -1.2550171613693237, |
|
"logps/chosen": -448.08612060546875, |
|
"logps/rejected": -611.9649658203125, |
|
"loss": 0.3966, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8825676441192627, |
|
"rewards/margins": 1.1555382013320923, |
|
"rewards/rejected": -3.0381054878234863, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.9309021113243762, |
|
"grad_norm": 13.467439618498904, |
|
"learning_rate": 7.237588670689076e-09, |
|
"logits/chosen": -1.2014961242675781, |
|
"logits/rejected": -1.348229169845581, |
|
"logps/chosen": -491.67352294921875, |
|
"logps/rejected": -760.8187866210938, |
|
"loss": 0.3867, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.231884479522705, |
|
"rewards/margins": 3.0948691368103027, |
|
"rewards/rejected": -5.32675313949585, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.9333013435700576, |
|
"grad_norm": 17.106585504517614, |
|
"learning_rate": 6.745937886635606e-09, |
|
"logits/chosen": -1.239768624305725, |
|
"logits/rejected": -1.2870354652404785, |
|
"logps/chosen": -483.9998474121094, |
|
"logps/rejected": -888.3824462890625, |
|
"loss": 0.4068, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.029411554336548, |
|
"rewards/margins": 4.014785289764404, |
|
"rewards/rejected": -6.044196128845215, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.935700575815739, |
|
"grad_norm": 17.65443575752541, |
|
"learning_rate": 6.271350408604409e-09, |
|
"logits/chosen": -1.2722991704940796, |
|
"logits/rejected": -1.2935806512832642, |
|
"logps/chosen": -389.75628662109375, |
|
"logps/rejected": -633.9022216796875, |
|
"loss": 0.3956, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.7770601511001587, |
|
"rewards/margins": 2.264246702194214, |
|
"rewards/rejected": -4.041306495666504, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9380998080614203, |
|
"grad_norm": 12.612133126164304, |
|
"learning_rate": 5.813859527021487e-09, |
|
"logits/chosen": -1.22406804561615, |
|
"logits/rejected": -1.323305606842041, |
|
"logps/chosen": -508.7425842285156, |
|
"logps/rejected": -782.00830078125, |
|
"loss": 0.338, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.409405469894409, |
|
"rewards/margins": 3.0094313621520996, |
|
"rewards/rejected": -5.418837070465088, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.9404990403071017, |
|
"grad_norm": 13.294898316790011, |
|
"learning_rate": 5.373497333054616e-09, |
|
"logits/chosen": -1.2985012531280518, |
|
"logits/rejected": -1.3380292654037476, |
|
"logps/chosen": -493.2757873535156, |
|
"logps/rejected": -623.1602783203125, |
|
"loss": 0.4384, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.2983577251434326, |
|
"rewards/margins": 1.3695790767669678, |
|
"rewards/rejected": -3.6679370403289795, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.9428982725527831, |
|
"grad_norm": 15.284137135135785, |
|
"learning_rate": 4.950294716362213e-09, |
|
"logits/chosen": -1.2158329486846924, |
|
"logits/rejected": -1.3253077268600464, |
|
"logps/chosen": -508.41424560546875, |
|
"logps/rejected": -638.3739624023438, |
|
"loss": 0.4209, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.2684988975524902, |
|
"rewards/margins": 1.3235927820205688, |
|
"rewards/rejected": -3.5920920372009277, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.9452975047984645, |
|
"grad_norm": 15.219945748745712, |
|
"learning_rate": 4.544281362926422e-09, |
|
"logits/chosen": -1.1814640760421753, |
|
"logits/rejected": -1.213180422782898, |
|
"logps/chosen": -480.6612854003906, |
|
"logps/rejected": -714.3478393554688, |
|
"loss": 0.443, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.8659789562225342, |
|
"rewards/margins": 2.4482996463775635, |
|
"rewards/rejected": -4.314279079437256, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.9476967370441459, |
|
"grad_norm": 12.199705253251745, |
|
"learning_rate": 4.15548575297095e-09, |
|
"logits/chosen": -1.1539068222045898, |
|
"logits/rejected": -1.2610228061676025, |
|
"logps/chosen": -470.3968811035156, |
|
"logps/rejected": -807.7662353515625, |
|
"loss": 0.3508, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2147834300994873, |
|
"rewards/margins": 3.456162214279175, |
|
"rewards/rejected": -5.6709465980529785, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.9500959692898272, |
|
"grad_norm": 9.529613600157132, |
|
"learning_rate": 3.7839351589631366e-09, |
|
"logits/chosen": -1.2287517786026, |
|
"logits/rejected": -1.1175611019134521, |
|
"logps/chosen": -470.9747009277344, |
|
"logps/rejected": -738.1583251953125, |
|
"loss": 0.4044, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.675825595855713, |
|
"rewards/margins": 2.222598075866699, |
|
"rewards/rejected": -4.89842414855957, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.9524952015355086, |
|
"grad_norm": 18.605428038684874, |
|
"learning_rate": 3.4296556437010405e-09, |
|
"logits/chosen": -1.2765130996704102, |
|
"logits/rejected": -1.2980254888534546, |
|
"logps/chosen": -460.11541748046875, |
|
"logps/rejected": -656.4251098632812, |
|
"loss": 0.4182, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.518393039703369, |
|
"rewards/margins": 2.0247464179992676, |
|
"rewards/rejected": -4.543139457702637, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.95489443378119, |
|
"grad_norm": 14.363883415576543, |
|
"learning_rate": 3.092672058485124e-09, |
|
"logits/chosen": -1.3632985353469849, |
|
"logits/rejected": -1.3597389459609985, |
|
"logps/chosen": -513.4880981445312, |
|
"logps/rejected": -859.6558837890625, |
|
"loss": 0.4086, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.856234550476074, |
|
"rewards/margins": 3.326939821243286, |
|
"rewards/rejected": -6.183174133300781, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.9572936660268714, |
|
"grad_norm": 24.170474346453393, |
|
"learning_rate": 2.7730080413750356e-09, |
|
"logits/chosen": -1.1727163791656494, |
|
"logits/rejected": -1.296337604522705, |
|
"logps/chosen": -467.4671325683594, |
|
"logps/rejected": -634.9149169921875, |
|
"loss": 0.4019, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.021527051925659, |
|
"rewards/margins": 1.6675231456756592, |
|
"rewards/rejected": -3.6890506744384766, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"grad_norm": 15.145186840475311, |
|
"learning_rate": 2.4706860155316033e-09, |
|
"logits/chosen": -1.1688404083251953, |
|
"logits/rejected": -1.2660518884658813, |
|
"logps/chosen": -569.9762573242188, |
|
"logps/rejected": -844.1318359375, |
|
"loss": 0.3981, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.3475515842437744, |
|
"rewards/margins": 2.774146556854248, |
|
"rewards/rejected": -5.121697902679443, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9620921305182342, |
|
"grad_norm": 20.980446798559257, |
|
"learning_rate": 2.185727187643843e-09, |
|
"logits/chosen": -1.2156826257705688, |
|
"logits/rejected": -1.2687807083129883, |
|
"logps/chosen": -427.87774658203125, |
|
"logps/rejected": -791.0867309570312, |
|
"loss": 0.4573, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.1100106239318848, |
|
"rewards/margins": 3.6505370140075684, |
|
"rewards/rejected": -5.7605485916137695, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.9644913627639156, |
|
"grad_norm": 26.09657808054691, |
|
"learning_rate": 1.9181515464413434e-09, |
|
"logits/chosen": -1.0822070837020874, |
|
"logits/rejected": -1.1274657249450684, |
|
"logps/chosen": -565.4129028320312, |
|
"logps/rejected": -877.5753784179688, |
|
"loss": 0.3914, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.3460755348205566, |
|
"rewards/margins": 3.1658713817596436, |
|
"rewards/rejected": -5.511946678161621, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.966890595009597, |
|
"grad_norm": 20.358263015517405, |
|
"learning_rate": 1.6679778612923302e-09, |
|
"logits/chosen": -1.2023911476135254, |
|
"logits/rejected": -1.3535006046295166, |
|
"logps/chosen": -511.30792236328125, |
|
"logps/rejected": -691.3760375976562, |
|
"loss": 0.3992, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.373955011367798, |
|
"rewards/margins": 1.693084716796875, |
|
"rewards/rejected": -4.0670390129089355, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.9692898272552783, |
|
"grad_norm": 17.09466339113468, |
|
"learning_rate": 1.43522368088686e-09, |
|
"logits/chosen": -1.2254010438919067, |
|
"logits/rejected": -1.3339544534683228, |
|
"logps/chosen": -497.1849670410156, |
|
"logps/rejected": -889.703125, |
|
"loss": 0.48, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.4439101219177246, |
|
"rewards/margins": 3.9405925273895264, |
|
"rewards/rejected": -6.384502410888672, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.9716890595009597, |
|
"grad_norm": 17.680515818931642, |
|
"learning_rate": 1.2199053320059993e-09, |
|
"logits/chosen": -1.2316696643829346, |
|
"logits/rejected": -1.2435214519500732, |
|
"logps/chosen": -472.69000244140625, |
|
"logps/rejected": -706.5465087890625, |
|
"loss": 0.3977, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.054659605026245, |
|
"rewards/margins": 2.194859504699707, |
|
"rewards/rejected": -4.249519348144531, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.974088291746641, |
|
"grad_norm": 19.737454721261717, |
|
"learning_rate": 1.0220379183764338e-09, |
|
"logits/chosen": -1.2599509954452515, |
|
"logits/rejected": -1.2703198194503784, |
|
"logps/chosen": -364.81658935546875, |
|
"logps/rejected": -650.0493774414062, |
|
"loss": 0.3815, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5390068292617798, |
|
"rewards/margins": 2.7725987434387207, |
|
"rewards/rejected": -4.311606407165527, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.9764875239923224, |
|
"grad_norm": 20.629402096960852, |
|
"learning_rate": 8.416353196111503e-10, |
|
"logits/chosen": -1.2555500268936157, |
|
"logits/rejected": -1.2429850101470947, |
|
"logps/chosen": -496.2998046875, |
|
"logps/rejected": -772.2160034179688, |
|
"loss": 0.4317, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.5762507915496826, |
|
"rewards/margins": 2.843451499938965, |
|
"rewards/rejected": -5.419702053070068, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.9788867562380038, |
|
"grad_norm": 21.54194458199129, |
|
"learning_rate": 6.787101902356873e-10, |
|
"logits/chosen": -1.3214588165283203, |
|
"logits/rejected": -1.3182449340820312, |
|
"logps/chosen": -515.1784057617188, |
|
"logps/rejected": -786.1739501953125, |
|
"loss": 0.4275, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.5369973182678223, |
|
"rewards/margins": 2.521944999694824, |
|
"rewards/rejected": -5.058941841125488, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.9812859884836852, |
|
"grad_norm": 22.64402950848799, |
|
"learning_rate": 5.332739588005953e-10, |
|
"logits/chosen": -1.2550867795944214, |
|
"logits/rejected": -1.322644591331482, |
|
"logps/chosen": -376.3187561035156, |
|
"logps/rejected": -727.2735595703125, |
|
"loss": 0.4059, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7436511516571045, |
|
"rewards/margins": 3.295630693435669, |
|
"rewards/rejected": -5.039282321929932, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.9836852207293666, |
|
"grad_norm": 22.391181670910232, |
|
"learning_rate": 4.053368270797164e-10, |
|
"logits/chosen": -1.2337408065795898, |
|
"logits/rejected": -1.2769973278045654, |
|
"logps/chosen": -468.5732421875, |
|
"logps/rejected": -767.9414672851562, |
|
"loss": 0.4017, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.362514019012451, |
|
"rewards/margins": 3.0355606079101562, |
|
"rewards/rejected": -5.398074150085449, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.986084452975048, |
|
"grad_norm": 14.32266939448474, |
|
"learning_rate": 2.949077693545354e-10, |
|
"logits/chosen": -1.1863398551940918, |
|
"logits/rejected": -1.2951616048812866, |
|
"logps/chosen": -504.52801513671875, |
|
"logps/rejected": -719.0045166015625, |
|
"loss": 0.4678, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.333911895751953, |
|
"rewards/margins": 1.8691895008087158, |
|
"rewards/rejected": -4.203102111816406, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.9884836852207294, |
|
"grad_norm": 23.976635069300592, |
|
"learning_rate": 2.0199453178471047e-10, |
|
"logits/chosen": -1.126481294631958, |
|
"logits/rejected": -1.2799094915390015, |
|
"logps/chosen": -538.2355346679688, |
|
"logps/rejected": -603.18994140625, |
|
"loss": 0.4332, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.318129062652588, |
|
"rewards/margins": 1.0544296503067017, |
|
"rewards/rejected": -3.372559070587158, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.9908829174664108, |
|
"grad_norm": 40.95175275330808, |
|
"learning_rate": 1.266036318647301e-10, |
|
"logits/chosen": -1.2519207000732422, |
|
"logits/rejected": -1.3095006942749023, |
|
"logps/chosen": -540.7717895507812, |
|
"logps/rejected": -778.435791015625, |
|
"loss": 0.4474, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.4916446208953857, |
|
"rewards/margins": 2.5904107093811035, |
|
"rewards/rejected": -5.08205509185791, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.9932821497120922, |
|
"grad_norm": 17.066173818194596, |
|
"learning_rate": 6.874035796672339e-11, |
|
"logits/chosen": -1.2024726867675781, |
|
"logits/rejected": -1.2957074642181396, |
|
"logps/chosen": -512.9348754882812, |
|
"logps/rejected": -842.3238525390625, |
|
"loss": 0.4125, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.164402961730957, |
|
"rewards/margins": 3.843069076538086, |
|
"rewards/rejected": -6.007472515106201, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.9956813819577736, |
|
"grad_norm": 20.618667625472924, |
|
"learning_rate": 2.8408768969423458e-11, |
|
"logits/chosen": -1.2141971588134766, |
|
"logits/rejected": -1.2368704080581665, |
|
"logps/chosen": -452.35345458984375, |
|
"logps/rejected": -660.5663452148438, |
|
"loss": 0.3909, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.867018699645996, |
|
"rewards/margins": 2.0285303592681885, |
|
"rewards/rejected": -3.8955492973327637, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.9980806142034548, |
|
"grad_norm": 21.52118343490649, |
|
"learning_rate": 5.611693973617271e-12, |
|
"logits/chosen": -1.2614226341247559, |
|
"logits/rejected": -1.27662193775177, |
|
"logps/chosen": -454.05596923828125, |
|
"logps/rejected": -670.5745239257812, |
|
"loss": 0.4453, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.251619338989258, |
|
"rewards/margins": 2.0149483680725098, |
|
"rewards/rejected": -4.266567707061768, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 4168, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4679888197991303, |
|
"train_runtime": 14179.2142, |
|
"train_samples_per_second": 9.406, |
|
"train_steps_per_second": 0.294 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4168, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|