diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6297 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 100, + "global_step": 4164, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0007204610951008645, + "grad_norm": 2.336021900177002, + "learning_rate": 1.199040767386091e-10, + "logits/chosen": -1.3860063552856445, + "logits/rejected": -1.3949532508850098, + "logps/chosen": -34.621925354003906, + "logps/rejected": -37.30891418457031, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.007204610951008645, + "grad_norm": 2.7957100868225098, + "learning_rate": 1.199040767386091e-09, + "logits/chosen": -1.5467724800109863, + "logits/rejected": -1.5282496213912964, + "logps/chosen": -42.52306365966797, + "logps/rejected": -44.5566520690918, + "loss": 0.6932, + "rewards/accuracies": 0.3611111044883728, + "rewards/chosen": -9.029280045069754e-05, + "rewards/margins": -2.0939776732120663e-05, + "rewards/rejected": -6.935300189070404e-05, + "step": 10 + }, + { + "epoch": 0.01440922190201729, + "grad_norm": 2.9324934482574463, + "learning_rate": 2.398081534772182e-09, + "logits/chosen": -1.555262804031372, + "logits/rejected": -1.5412877798080444, + "logps/chosen": -44.08427810668945, + "logps/rejected": -46.5708122253418, + "loss": 0.6933, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": 1.1827131856989581e-05, + "rewards/margins": -0.00023911210882943124, + "rewards/rejected": 0.0002509392215870321, + "step": 20 + }, + { + "epoch": 0.021613832853025938, + "grad_norm": 3.497398614883423, + "learning_rate": 3.597122302158273e-09, + "logits/chosen": -1.5116827487945557, + "logits/rejected": -1.50448739528656, + "logps/chosen": -47.85178756713867, + "logps/rejected": -50.80080795288086, + "loss": 0.6931, + "rewards/accuracies": 0.4312500059604645, + "rewards/chosen": -2.9822525903000496e-05, + "rewards/margins": 6.526964716613293e-05, + "rewards/rejected": -9.509220399195328e-05, + "step": 30 + }, + { + "epoch": 0.02881844380403458, + "grad_norm": 2.5853052139282227, + "learning_rate": 4.796163069544364e-09, + "logits/chosen": -1.5581772327423096, + "logits/rejected": -1.5541572570800781, + "logps/chosen": -43.06446838378906, + "logps/rejected": -45.565834045410156, + "loss": 0.6931, + "rewards/accuracies": 0.45625001192092896, + "rewards/chosen": -0.00012712908210232854, + "rewards/margins": 5.798434722237289e-05, + "rewards/rejected": -0.00018511342932470143, + "step": 40 + }, + { + "epoch": 0.03602305475504323, + "grad_norm": 2.652137041091919, + "learning_rate": 5.995203836930456e-09, + "logits/chosen": -1.4693658351898193, + "logits/rejected": -1.4685413837432861, + "logps/chosen": -43.009254455566406, + "logps/rejected": -44.814476013183594, + "loss": 0.6932, + "rewards/accuracies": 0.4375, + "rewards/chosen": -2.008357478189282e-05, + "rewards/margins": -0.0001420602493453771, + "rewards/rejected": 0.00012197670002933592, + "step": 50 + }, + { + "epoch": 0.043227665706051875, + "grad_norm": 3.9296295642852783, + "learning_rate": 7.194244604316546e-09, + "logits/chosen": -1.5675886869430542, + "logits/rejected": -1.5608971118927002, + "logps/chosen": -50.68689727783203, + "logps/rejected": -52.0194206237793, + "loss": 0.6932, + "rewards/accuracies": 0.4312500059604645, + "rewards/chosen": -2.8326398933131713e-06, + "rewards/margins": -2.4851691705407575e-05, + "rewards/rejected": 2.201905044785235e-05, + "step": 60 + }, + { + "epoch": 0.05043227665706052, + "grad_norm": 2.2988357543945312, + "learning_rate": 8.393285371702639e-09, + "logits/chosen": -1.5360424518585205, + "logits/rejected": -1.5283145904541016, + "logps/chosen": -50.06494903564453, + "logps/rejected": -52.77583694458008, + "loss": 0.6931, + "rewards/accuracies": 0.48750001192092896, + "rewards/chosen": 3.0180139219737612e-05, + "rewards/margins": 4.403649290907197e-05, + "rewards/rejected": -1.3856357327313162e-05, + "step": 70 + }, + { + "epoch": 0.05763688760806916, + "grad_norm": 3.470907211303711, + "learning_rate": 9.592326139088728e-09, + "logits/chosen": -1.5697886943817139, + "logits/rejected": -1.5619677305221558, + "logps/chosen": -51.1032600402832, + "logps/rejected": -52.691810607910156, + "loss": 0.6931, + "rewards/accuracies": 0.4312500059604645, + "rewards/chosen": 8.321718632942066e-05, + "rewards/margins": 0.0001748651557136327, + "rewards/rejected": -9.164798393612728e-05, + "step": 80 + }, + { + "epoch": 0.06484149855907781, + "grad_norm": 2.783578872680664, + "learning_rate": 1.0791366906474819e-08, + "logits/chosen": -1.5033096075057983, + "logits/rejected": -1.5004870891571045, + "logps/chosen": -49.00882339477539, + "logps/rejected": -51.163333892822266, + "loss": 0.6931, + "rewards/accuracies": 0.53125, + "rewards/chosen": 7.983654359122738e-05, + "rewards/margins": 7.151851605158299e-06, + "rewards/rejected": 7.268470653798431e-05, + "step": 90 + }, + { + "epoch": 0.07204610951008646, + "grad_norm": 3.002304792404175, + "learning_rate": 1.1990407673860912e-08, + "logits/chosen": -1.5840990543365479, + "logits/rejected": -1.573439598083496, + "logps/chosen": -45.69633483886719, + "logps/rejected": -48.739601135253906, + "loss": 0.6933, + "rewards/accuracies": 0.4312500059604645, + "rewards/chosen": -5.749738193117082e-05, + "rewards/margins": -0.0002472072374075651, + "rewards/rejected": 0.0001897098554763943, + "step": 100 + }, + { + "epoch": 0.0792507204610951, + "grad_norm": 2.3070425987243652, + "learning_rate": 1.3189448441247003e-08, + "logits/chosen": -1.4549312591552734, + "logits/rejected": -1.4311813116073608, + "logps/chosen": -48.96226119995117, + "logps/rejected": -51.16582489013672, + "loss": 0.6933, + "rewards/accuracies": 0.40625, + "rewards/chosen": -0.00029989113681949675, + "rewards/margins": -0.0003156957100145519, + "rewards/rejected": 1.5804591384949163e-05, + "step": 110 + }, + { + "epoch": 0.08645533141210375, + "grad_norm": 2.3011035919189453, + "learning_rate": 1.4388489208633092e-08, + "logits/chosen": -1.483244776725769, + "logits/rejected": -1.4799872636795044, + "logps/chosen": -44.255714416503906, + "logps/rejected": -46.613468170166016, + "loss": 0.6931, + "rewards/accuracies": 0.4937500059604645, + "rewards/chosen": -2.3762720957165584e-05, + "rewards/margins": 0.00010847167868632823, + "rewards/rejected": -0.0001322343887295574, + "step": 120 + }, + { + "epoch": 0.0936599423631124, + "grad_norm": 3.2029104232788086, + "learning_rate": 1.5587529976019183e-08, + "logits/chosen": -1.5763423442840576, + "logits/rejected": -1.5709102153778076, + "logps/chosen": -49.423423767089844, + "logps/rejected": -51.306007385253906, + "loss": 0.6932, + "rewards/accuracies": 0.4312500059604645, + "rewards/chosen": -0.0001741496816975996, + "rewards/margins": -0.00012099805462639779, + "rewards/rejected": -5.31516270712018e-05, + "step": 130 + }, + { + "epoch": 0.10086455331412104, + "grad_norm": 2.831598997116089, + "learning_rate": 1.6786570743405277e-08, + "logits/chosen": -1.4523359537124634, + "logits/rejected": -1.4428811073303223, + "logps/chosen": -45.845314025878906, + "logps/rejected": -50.251155853271484, + "loss": 0.6931, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": 1.4822484445176087e-05, + "rewards/margins": 5.557260010391474e-05, + "rewards/rejected": -4.075012475368567e-05, + "step": 140 + }, + { + "epoch": 0.10806916426512968, + "grad_norm": 3.5805459022521973, + "learning_rate": 1.7985611510791365e-08, + "logits/chosen": -1.4715862274169922, + "logits/rejected": -1.464839220046997, + "logps/chosen": -48.3131103515625, + "logps/rejected": -51.4343147277832, + "loss": 0.6932, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.0001291931257583201, + "rewards/margins": -0.00011216111306566745, + "rewards/rejected": -1.7032030882546678e-05, + "step": 150 + }, + { + "epoch": 0.11527377521613832, + "grad_norm": 2.45888090133667, + "learning_rate": 1.9184652278177456e-08, + "logits/chosen": -1.5041993856430054, + "logits/rejected": -1.4858150482177734, + "logps/chosen": -41.26018524169922, + "logps/rejected": -44.56896209716797, + "loss": 0.6933, + "rewards/accuracies": 0.4437499940395355, + "rewards/chosen": -5.096942186355591e-05, + "rewards/margins": -0.00033451110357418656, + "rewards/rejected": 0.0002835417108144611, + "step": 160 + }, + { + "epoch": 0.12247838616714697, + "grad_norm": 3.1718838214874268, + "learning_rate": 2.038369304556355e-08, + "logits/chosen": -1.5168631076812744, + "logits/rejected": -1.4979842901229858, + "logps/chosen": -44.85700225830078, + "logps/rejected": -46.86701583862305, + "loss": 0.6932, + "rewards/accuracies": 0.5, + "rewards/chosen": -3.738096711458638e-05, + "rewards/margins": -0.00010323604510631412, + "rewards/rejected": 6.585508526768535e-05, + "step": 170 + }, + { + "epoch": 0.12968299711815562, + "grad_norm": 2.619306802749634, + "learning_rate": 2.1582733812949638e-08, + "logits/chosen": -1.5800260305404663, + "logits/rejected": -1.5681655406951904, + "logps/chosen": -45.09278106689453, + "logps/rejected": -46.82966995239258, + "loss": 0.6932, + "rewards/accuracies": 0.4937500059604645, + "rewards/chosen": -7.130586709536146e-06, + "rewards/margins": -0.00016551685985177755, + "rewards/rejected": 0.00015838624676689506, + "step": 180 + }, + { + "epoch": 0.13688760806916425, + "grad_norm": 2.9155962467193604, + "learning_rate": 2.278177458033573e-08, + "logits/chosen": -1.5894930362701416, + "logits/rejected": -1.5857088565826416, + "logps/chosen": -42.25670623779297, + "logps/rejected": -45.39856719970703, + "loss": 0.6932, + "rewards/accuracies": 0.41874998807907104, + "rewards/chosen": 5.098475230624899e-05, + "rewards/margins": -8.445042112725787e-06, + "rewards/rejected": 5.9429770772112533e-05, + "step": 190 + }, + { + "epoch": 0.1440922190201729, + "grad_norm": 3.554685354232788, + "learning_rate": 2.3980815347721823e-08, + "logits/chosen": -1.5361117124557495, + "logits/rejected": -1.5294668674468994, + "logps/chosen": -43.47795486450195, + "logps/rejected": -47.10270309448242, + "loss": 0.6932, + "rewards/accuracies": 0.4375, + "rewards/chosen": 5.860309465788305e-05, + "rewards/margins": -0.0001218240795424208, + "rewards/rejected": 0.00018042718875221908, + "step": 200 + }, + { + "epoch": 0.15129682997118155, + "grad_norm": 3.053459405899048, + "learning_rate": 2.517985611510791e-08, + "logits/chosen": -1.565314531326294, + "logits/rejected": -1.554024338722229, + "logps/chosen": -43.0426025390625, + "logps/rejected": -43.413352966308594, + "loss": 0.6931, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": -5.537036486202851e-05, + "rewards/margins": 6.648890121141449e-05, + "rewards/rejected": -0.00012185927334940061, + "step": 210 + }, + { + "epoch": 0.1585014409221902, + "grad_norm": 2.835756301879883, + "learning_rate": 2.6378896882494006e-08, + "logits/chosen": -1.4806112051010132, + "logits/rejected": -1.473852515220642, + "logps/chosen": -47.3744010925293, + "logps/rejected": -52.52124786376953, + "loss": 0.6933, + "rewards/accuracies": 0.41874998807907104, + "rewards/chosen": -0.00017219179426319897, + "rewards/margins": -0.0003234629984945059, + "rewards/rejected": 0.00015127118967939168, + "step": 220 + }, + { + "epoch": 0.16570605187319884, + "grad_norm": 2.5750908851623535, + "learning_rate": 2.7577937649880097e-08, + "logits/chosen": -1.5319396257400513, + "logits/rejected": -1.530428171157837, + "logps/chosen": -44.463134765625, + "logps/rejected": -48.242122650146484, + "loss": 0.6931, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 6.716211646562442e-05, + "rewards/margins": -5.54590940282651e-07, + "rewards/rejected": 6.771668267901987e-05, + "step": 230 + }, + { + "epoch": 0.1729106628242075, + "grad_norm": 3.010782480239868, + "learning_rate": 2.8776978417266184e-08, + "logits/chosen": -1.5795023441314697, + "logits/rejected": -1.5688108205795288, + "logps/chosen": -49.142967224121094, + "logps/rejected": -51.20656204223633, + "loss": 0.6931, + "rewards/accuracies": 0.5, + "rewards/chosen": -1.059528767655138e-05, + "rewards/margins": 1.4627486052631866e-05, + "rewards/rejected": -2.5222787371603772e-05, + "step": 240 + }, + { + "epoch": 0.18011527377521613, + "grad_norm": 4.015756130218506, + "learning_rate": 2.997601918465228e-08, + "logits/chosen": -1.4548447132110596, + "logits/rejected": -1.4441945552825928, + "logps/chosen": -49.71647644042969, + "logps/rejected": -50.771690368652344, + "loss": 0.6931, + "rewards/accuracies": 0.46875, + "rewards/chosen": -8.51520017022267e-05, + "rewards/margins": 1.9873681594617665e-05, + "rewards/rejected": -0.00010502567602088675, + "step": 250 + }, + { + "epoch": 0.1873198847262248, + "grad_norm": 3.1907923221588135, + "learning_rate": 3.1175059952038366e-08, + "logits/chosen": -1.4959896802902222, + "logits/rejected": -1.4902995824813843, + "logps/chosen": -49.98474884033203, + "logps/rejected": -51.45148468017578, + "loss": 0.693, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.0002439332165522501, + "rewards/margins": 0.00025814835680648685, + "rewards/rejected": -1.4215113878890406e-05, + "step": 260 + }, + { + "epoch": 0.19452449567723343, + "grad_norm": 3.32533860206604, + "learning_rate": 3.237410071942446e-08, + "logits/chosen": -1.5843312740325928, + "logits/rejected": -1.5691900253295898, + "logps/chosen": -46.75273132324219, + "logps/rejected": -49.31193542480469, + "loss": 0.6931, + "rewards/accuracies": 0.45625001192092896, + "rewards/chosen": 0.00015570121468044817, + "rewards/margins": 3.353106149006635e-05, + "rewards/rejected": 0.00012217015319038182, + "step": 270 + }, + { + "epoch": 0.2017291066282421, + "grad_norm": 2.922327756881714, + "learning_rate": 3.3573141486810555e-08, + "logits/chosen": -1.5491392612457275, + "logits/rejected": -1.5333701372146606, + "logps/chosen": -42.477779388427734, + "logps/rejected": -43.71261978149414, + "loss": 0.6932, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -7.194602221716195e-05, + "rewards/margins": -0.00013697049871552736, + "rewards/rejected": 6.502445467049256e-05, + "step": 280 + }, + { + "epoch": 0.20893371757925072, + "grad_norm": 2.781327724456787, + "learning_rate": 3.477218225419664e-08, + "logits/chosen": -1.5796802043914795, + "logits/rejected": -1.5691970586776733, + "logps/chosen": -44.49600601196289, + "logps/rejected": -45.78491973876953, + "loss": 0.6931, + "rewards/accuracies": 0.53125, + "rewards/chosen": 0.00016616811626590788, + "rewards/margins": 0.0001632832718314603, + "rewards/rejected": 2.884840114347753e-06, + "step": 290 + }, + { + "epoch": 0.21613832853025935, + "grad_norm": 3.15329909324646, + "learning_rate": 3.597122302158273e-08, + "logits/chosen": -1.5556309223175049, + "logits/rejected": -1.5389394760131836, + "logps/chosen": -48.224769592285156, + "logps/rejected": -50.68939971923828, + "loss": 0.6933, + "rewards/accuracies": 0.38749998807907104, + "rewards/chosen": -0.0002144512691302225, + "rewards/margins": -0.00030510686337947845, + "rewards/rejected": 9.065552876563743e-05, + "step": 300 + }, + { + "epoch": 0.22334293948126802, + "grad_norm": 3.011691093444824, + "learning_rate": 3.717026378896883e-08, + "logits/chosen": -1.4414619207382202, + "logits/rejected": -1.4318530559539795, + "logps/chosen": -48.020721435546875, + "logps/rejected": -50.055450439453125, + "loss": 0.6931, + "rewards/accuracies": 0.48750001192092896, + "rewards/chosen": 0.00020195532124489546, + "rewards/margins": 0.00012811608030460775, + "rewards/rejected": 7.383924094028771e-05, + "step": 310 + }, + { + "epoch": 0.23054755043227665, + "grad_norm": 2.5438578128814697, + "learning_rate": 3.836930455635491e-08, + "logits/chosen": -1.5534722805023193, + "logits/rejected": -1.5385301113128662, + "logps/chosen": -47.08538055419922, + "logps/rejected": -51.54567337036133, + "loss": 0.6931, + "rewards/accuracies": 0.4937500059604645, + "rewards/chosen": 0.0002954248629976064, + "rewards/margins": 9.292210597777739e-05, + "rewards/rejected": 0.0002025027060881257, + "step": 320 + }, + { + "epoch": 0.2377521613832853, + "grad_norm": 2.316570520401001, + "learning_rate": 3.9568345323741003e-08, + "logits/chosen": -1.5250051021575928, + "logits/rejected": -1.5185617208480835, + "logps/chosen": -50.739192962646484, + "logps/rejected": -49.54676055908203, + "loss": 0.6931, + "rewards/accuracies": 0.45625001192092896, + "rewards/chosen": 0.00017470329476054758, + "rewards/margins": -3.749446477741003e-07, + "rewards/rejected": 0.00017507823940832168, + "step": 330 + }, + { + "epoch": 0.24495677233429394, + "grad_norm": 2.6795296669006348, + "learning_rate": 4.07673860911271e-08, + "logits/chosen": -1.5784777402877808, + "logits/rejected": -1.5679104328155518, + "logps/chosen": -51.091636657714844, + "logps/rejected": -52.1263542175293, + "loss": 0.693, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": 5.405420597526245e-05, + "rewards/margins": 0.00031650811433792114, + "rewards/rejected": -0.0002624538610689342, + "step": 340 + }, + { + "epoch": 0.2521613832853026, + "grad_norm": 3.6704699993133545, + "learning_rate": 4.1966426858513185e-08, + "logits/chosen": -1.5095831155776978, + "logits/rejected": -1.504861831665039, + "logps/chosen": -45.61798858642578, + "logps/rejected": -48.589141845703125, + "loss": 0.6929, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": 0.000504065363202244, + "rewards/margins": 0.00041280948789790273, + "rewards/rejected": 9.125589713221416e-05, + "step": 350 + }, + { + "epoch": 0.25936599423631124, + "grad_norm": 3.4911813735961914, + "learning_rate": 4.3165467625899276e-08, + "logits/chosen": -1.501556634902954, + "logits/rejected": -1.4911236763000488, + "logps/chosen": -53.90102005004883, + "logps/rejected": -56.54648971557617, + "loss": 0.693, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": 0.00031928817043080926, + "rewards/margins": 0.00022553373128175735, + "rewards/rejected": 9.375448280479759e-05, + "step": 360 + }, + { + "epoch": 0.2665706051873199, + "grad_norm": 3.746173143386841, + "learning_rate": 4.4364508393285374e-08, + "logits/chosen": -1.482246994972229, + "logits/rejected": -1.4804664850234985, + "logps/chosen": -48.317405700683594, + "logps/rejected": -53.011993408203125, + "loss": 0.6931, + "rewards/accuracies": 0.48750001192092896, + "rewards/chosen": 0.00026067602448165417, + "rewards/margins": 0.0001698151318123564, + "rewards/rejected": 9.08608635654673e-05, + "step": 370 + }, + { + "epoch": 0.2737752161383285, + "grad_norm": 2.481205701828003, + "learning_rate": 4.556354916067146e-08, + "logits/chosen": -1.5724612474441528, + "logits/rejected": -1.5653808116912842, + "logps/chosen": -47.00031661987305, + "logps/rejected": -48.018943786621094, + "loss": 0.6933, + "rewards/accuracies": 0.4937500059604645, + "rewards/chosen": 3.1971517273632344e-06, + "rewards/margins": -0.0002721514902077615, + "rewards/rejected": 0.0002753486332949251, + "step": 380 + }, + { + "epoch": 0.28097982708933716, + "grad_norm": 2.9210126399993896, + "learning_rate": 4.676258992805755e-08, + "logits/chosen": -1.54493248462677, + "logits/rejected": -1.5351022481918335, + "logps/chosen": -48.16374969482422, + "logps/rejected": -51.50679397583008, + "loss": 0.693, + "rewards/accuracies": 0.5625, + "rewards/chosen": 0.000324452732456848, + "rewards/margins": 0.0003632038424257189, + "rewards/rejected": -3.8751088140998036e-05, + "step": 390 + }, + { + "epoch": 0.2881844380403458, + "grad_norm": 3.4040896892547607, + "learning_rate": 4.796163069544365e-08, + "logits/chosen": -1.5583593845367432, + "logits/rejected": -1.5532950162887573, + "logps/chosen": -44.89096450805664, + "logps/rejected": -46.113468170166016, + "loss": 0.6932, + "rewards/accuracies": 0.4375, + "rewards/chosen": 0.0001240587153006345, + "rewards/margins": -4.035348320030607e-05, + "rewards/rejected": 0.00016441218031104654, + "step": 400 + }, + { + "epoch": 0.2953890489913545, + "grad_norm": 3.631049871444702, + "learning_rate": 4.916067146282973e-08, + "logits/chosen": -1.5089349746704102, + "logits/rejected": -1.5059764385223389, + "logps/chosen": -47.52130889892578, + "logps/rejected": -49.62239456176758, + "loss": 0.6932, + "rewards/accuracies": 0.45625001192092896, + "rewards/chosen": -2.1968793589621782e-05, + "rewards/margins": -3.3516105759190395e-05, + "rewards/rejected": 1.1547293979674578e-05, + "step": 410 + }, + { + "epoch": 0.3025936599423631, + "grad_norm": 2.4457123279571533, + "learning_rate": 4.999992091672379e-08, + "logits/chosen": -1.4693686962127686, + "logits/rejected": -1.4796515703201294, + "logps/chosen": -45.61933135986328, + "logps/rejected": -48.9610710144043, + "loss": 0.693, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": 0.0004673867952078581, + "rewards/margins": 0.000354716379661113, + "rewards/rejected": 0.0001126704373746179, + "step": 420 + }, + { + "epoch": 0.30979827089337175, + "grad_norm": 2.2836077213287354, + "learning_rate": 4.999851500573209e-08, + "logits/chosen": -1.4973236322402954, + "logits/rejected": -1.4976922273635864, + "logps/chosen": -46.055450439453125, + "logps/rejected": -46.19012451171875, + "loss": 0.6931, + "rewards/accuracies": 0.53125, + "rewards/chosen": 0.00042625871719792485, + "rewards/margins": 0.0001353234110865742, + "rewards/rejected": 0.00029093524790368974, + "step": 430 + }, + { + "epoch": 0.3170028818443804, + "grad_norm": 2.460236072540283, + "learning_rate": 4.999535180235972e-08, + "logits/chosen": -1.498327612876892, + "logits/rejected": -1.4901635646820068, + "logps/chosen": -46.00019454956055, + "logps/rejected": -49.427894592285156, + "loss": 0.693, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": 0.00046957674203440547, + "rewards/margins": 0.0002196793502662331, + "rewards/rejected": 0.00024989742087200284, + "step": 440 + }, + { + "epoch": 0.3242074927953891, + "grad_norm": 3.161519765853882, + "learning_rate": 4.9990431528966836e-08, + "logits/chosen": -1.5111868381500244, + "logits/rejected": -1.4901098012924194, + "logps/chosen": -53.20442581176758, + "logps/rejected": -51.29274368286133, + "loss": 0.6931, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": 0.00018602630007080734, + "rewards/margins": 0.0001877523900475353, + "rewards/rejected": -1.7260724689549534e-06, + "step": 450 + }, + { + "epoch": 0.3314121037463977, + "grad_norm": 3.7941031455993652, + "learning_rate": 4.9983754531428326e-08, + "logits/chosen": -1.5165042877197266, + "logits/rejected": -1.4989763498306274, + "logps/chosen": -53.773658752441406, + "logps/rejected": -55.66585159301758, + "loss": 0.6928, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": 0.0006648501148447394, + "rewards/margins": 0.0006117599550634623, + "rewards/rejected": 5.309013067744672e-05, + "step": 460 + }, + { + "epoch": 0.33861671469740634, + "grad_norm": 3.734304189682007, + "learning_rate": 4.997532127910954e-08, + "logits/chosen": -1.5783549547195435, + "logits/rejected": -1.5489213466644287, + "logps/chosen": -52.63933563232422, + "logps/rejected": -53.201080322265625, + "loss": 0.693, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": 0.0004433690046425909, + "rewards/margins": 0.0003627253754530102, + "rewards/rejected": 8.064367284532636e-05, + "step": 470 + }, + { + "epoch": 0.345821325648415, + "grad_norm": 3.6867148876190186, + "learning_rate": 4.996513236483331e-08, + "logits/chosen": -1.6470205783843994, + "logits/rejected": -1.6330623626708984, + "logps/chosen": -42.520477294921875, + "logps/rejected": -45.42660903930664, + "loss": 0.693, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": 0.00032508961157873273, + "rewards/margins": 0.00028424913762137294, + "rewards/rejected": 4.084048487129621e-05, + "step": 480 + }, + { + "epoch": 0.3530259365994236, + "grad_norm": 4.27908992767334, + "learning_rate": 4.9953188504838225e-08, + "logits/chosen": -1.5245378017425537, + "logits/rejected": -1.5128180980682373, + "logps/chosen": -46.4404411315918, + "logps/rejected": -49.49384689331055, + "loss": 0.6931, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.0004937785561196506, + "rewards/margins": 1.8969347365782596e-05, + "rewards/rejected": 0.0004748092032968998, + "step": 490 + }, + { + "epoch": 0.36023054755043227, + "grad_norm": 2.822087049484253, + "learning_rate": 4.993949053872834e-08, + "logits/chosen": -1.5284955501556396, + "logits/rejected": -1.5052189826965332, + "logps/chosen": -42.6441535949707, + "logps/rejected": -45.895713806152344, + "loss": 0.6929, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": 0.0006938868900761008, + "rewards/margins": 0.000535003375262022, + "rewards/rejected": 0.00015888357302173972, + "step": 500 + }, + { + "epoch": 0.36743515850144093, + "grad_norm": 2.8799259662628174, + "learning_rate": 4.9924039429414086e-08, + "logits/chosen": -1.639021873474121, + "logits/rejected": -1.6214443445205688, + "logps/chosen": -46.00009536743164, + "logps/rejected": -47.9588508605957, + "loss": 0.693, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": 0.0007414157735183835, + "rewards/margins": 0.00037929159589111805, + "rewards/rejected": 0.00036212411941960454, + "step": 510 + }, + { + "epoch": 0.3746397694524496, + "grad_norm": 3.5472185611724854, + "learning_rate": 4.990683626304467e-08, + "logits/chosen": -1.53446364402771, + "logits/rejected": -1.5293452739715576, + "logps/chosen": -53.89451217651367, + "logps/rejected": -56.04365921020508, + "loss": 0.6929, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": 0.0006791441701352596, + "rewards/margins": 0.0004991428577341139, + "rewards/rejected": 0.00018000123964156955, + "step": 520 + }, + { + "epoch": 0.3818443804034582, + "grad_norm": 3.2031264305114746, + "learning_rate": 4.9887882248931646e-08, + "logits/chosen": -1.4586400985717773, + "logits/rejected": -1.4377648830413818, + "logps/chosen": -46.408958435058594, + "logps/rejected": -47.54657745361328, + "loss": 0.6929, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": 0.0010369222145527601, + "rewards/margins": 0.0005174219841137528, + "rewards/rejected": 0.0005195002304390073, + "step": 530 + }, + { + "epoch": 0.38904899135446686, + "grad_norm": 3.1673731803894043, + "learning_rate": 4.986717871946393e-08, + "logits/chosen": -1.485285997390747, + "logits/rejected": -1.464300274848938, + "logps/chosen": -45.8732795715332, + "logps/rejected": -47.80138397216797, + "loss": 0.6928, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.0009578888420946896, + "rewards/margins": 0.0006758830859325826, + "rewards/rejected": 0.00028200578526593745, + "step": 540 + }, + { + "epoch": 0.3962536023054755, + "grad_norm": 3.0882983207702637, + "learning_rate": 4.984472713001416e-08, + "logits/chosen": -1.4302603006362915, + "logits/rejected": -1.421942949295044, + "logps/chosen": -48.35163879394531, + "logps/rejected": -48.36573028564453, + "loss": 0.6928, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": 0.0009299576049670577, + "rewards/margins": 0.0007230864139273763, + "rewards/rejected": 0.00020687119103968143, + "step": 550 + }, + { + "epoch": 0.4034582132564842, + "grad_norm": 3.2402331829071045, + "learning_rate": 4.982052905883637e-08, + "logits/chosen": -1.573286533355713, + "logits/rejected": -1.5629457235336304, + "logps/chosen": -48.487220764160156, + "logps/rejected": -49.93341827392578, + "loss": 0.6929, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": 0.0006783484714105725, + "rewards/margins": 0.000492787454277277, + "rewards/rejected": 0.00018556095892563462, + "step": 560 + }, + { + "epoch": 0.4106628242074928, + "grad_norm": 2.9258902072906494, + "learning_rate": 4.979458620695505e-08, + "logits/chosen": -1.5526586771011353, + "logits/rejected": -1.5234354734420776, + "logps/chosen": -52.4578857421875, + "logps/rejected": -54.48802947998047, + "loss": 0.6927, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.0012387813767418265, + "rewards/margins": 0.0009542113984934986, + "rewards/rejected": 0.0002845699491444975, + "step": 570 + }, + { + "epoch": 0.41786743515850144, + "grad_norm": 3.176051139831543, + "learning_rate": 4.976690039804555e-08, + "logits/chosen": -1.5767595767974854, + "logits/rejected": -1.563197374343872, + "logps/chosen": -42.638832092285156, + "logps/rejected": -44.07222366333008, + "loss": 0.693, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": 0.0005800885264761746, + "rewards/margins": 0.0003046609926968813, + "rewards/rejected": 0.00027542750467546284, + "step": 580 + }, + { + "epoch": 0.4250720461095101, + "grad_norm": 2.7319722175598145, + "learning_rate": 4.973747357830592e-08, + "logits/chosen": -1.5270984172821045, + "logits/rejected": -1.525622010231018, + "logps/chosen": -47.51670837402344, + "logps/rejected": -53.14258575439453, + "loss": 0.6927, + "rewards/accuracies": 0.5625, + "rewards/chosen": 0.0010576354106888175, + "rewards/margins": 0.0008552650106139481, + "rewards/rejected": 0.0002023702982114628, + "step": 590 + }, + { + "epoch": 0.4322766570605187, + "grad_norm": 2.7828357219696045, + "learning_rate": 4.970630781632009e-08, + "logits/chosen": -1.6297931671142578, + "logits/rejected": -1.6194136142730713, + "logps/chosen": -45.408538818359375, + "logps/rejected": -49.090171813964844, + "loss": 0.6927, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.0008878445369191468, + "rewards/margins": 0.0009432813385501504, + "rewards/rejected": -5.54367907170672e-05, + "step": 600 + }, + { + "epoch": 0.43948126801152737, + "grad_norm": 3.927708148956299, + "learning_rate": 4.967340530291242e-08, + "logits/chosen": -1.534347414970398, + "logits/rejected": -1.5172450542449951, + "logps/chosen": -50.444236755371094, + "logps/rejected": -51.08340835571289, + "loss": 0.6929, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": 0.0008061706321313977, + "rewards/margins": 0.00048699689796194434, + "rewards/rejected": 0.00031917367596179247, + "step": 610 + }, + { + "epoch": 0.44668587896253603, + "grad_norm": 2.691185235977173, + "learning_rate": 4.9638768350993755e-08, + "logits/chosen": -1.5679445266723633, + "logits/rejected": -1.553625464439392, + "logps/chosen": -42.3968505859375, + "logps/rejected": -44.43559265136719, + "loss": 0.693, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": 0.0008928319439291954, + "rewards/margins": 0.0002728732652030885, + "rewards/rejected": 0.000619958620518446, + "step": 620 + }, + { + "epoch": 0.4538904899135447, + "grad_norm": 2.364649534225464, + "learning_rate": 4.9602399395398786e-08, + "logits/chosen": -1.570885419845581, + "logits/rejected": -1.5635267496109009, + "logps/chosen": -43.05287551879883, + "logps/rejected": -46.550086975097656, + "loss": 0.6928, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": 0.0010358591098338366, + "rewards/margins": 0.000793623854406178, + "rewards/rejected": 0.00024223529908340424, + "step": 630 + }, + { + "epoch": 0.4610951008645533, + "grad_norm": 2.915830612182617, + "learning_rate": 4.9564300992714914e-08, + "logits/chosen": -1.428993821144104, + "logits/rejected": -1.4243650436401367, + "logps/chosen": -45.36574935913086, + "logps/rejected": -48.004005432128906, + "loss": 0.6927, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": 0.00126446015201509, + "rewards/margins": 0.0009202055516652763, + "rewards/rejected": 0.00034425462945364416, + "step": 640 + }, + { + "epoch": 0.46829971181556196, + "grad_norm": 3.4686129093170166, + "learning_rate": 4.952447582110253e-08, + "logits/chosen": -1.6131556034088135, + "logits/rejected": -1.5842258930206299, + "logps/chosen": -45.44933319091797, + "logps/rejected": -45.383323669433594, + "loss": 0.6929, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": 0.001398588763549924, + "rewards/margins": 0.0005807363195344806, + "rewards/rejected": 0.0008178524440154433, + "step": 650 + }, + { + "epoch": 0.4755043227665706, + "grad_norm": 3.4266490936279297, + "learning_rate": 4.948292668010676e-08, + "logits/chosen": -1.5424816608428955, + "logits/rejected": -1.54043447971344, + "logps/chosen": -47.142127990722656, + "logps/rejected": -50.01351547241211, + "loss": 0.6926, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": 0.0012382261920720339, + "rewards/margins": 0.0010371087118983269, + "rewards/rejected": 0.0002011175238294527, + "step": 660 + }, + { + "epoch": 0.4827089337175792, + "grad_norm": 3.418168306350708, + "learning_rate": 4.943965649046064e-08, + "logits/chosen": -1.5016462802886963, + "logits/rejected": -1.4743115901947021, + "logps/chosen": -49.834571838378906, + "logps/rejected": -51.15303039550781, + "loss": 0.6929, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": 0.000952904112637043, + "rewards/margins": 0.0004903805674985051, + "rewards/rejected": 0.00046252348693087697, + "step": 670 + }, + { + "epoch": 0.4899135446685879, + "grad_norm": 4.634679794311523, + "learning_rate": 4.9394668293879835e-08, + "logits/chosen": -1.4445552825927734, + "logits/rejected": -1.4312325716018677, + "logps/chosen": -49.70585250854492, + "logps/rejected": -49.58285903930664, + "loss": 0.6926, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.0017881987150758505, + "rewards/margins": 0.0011380156502127647, + "rewards/rejected": 0.0006501831230707467, + "step": 680 + }, + { + "epoch": 0.49711815561959655, + "grad_norm": 3.3351666927337646, + "learning_rate": 4.93479652528488e-08, + "logits/chosen": -1.5312552452087402, + "logits/rejected": -1.5206135511398315, + "logps/chosen": -47.807212829589844, + "logps/rejected": -50.62922286987305, + "loss": 0.6926, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.0014403645182028413, + "rewards/margins": 0.0011131230276077986, + "rewards/rejected": 0.00032724151969887316, + "step": 690 + }, + { + "epoch": 0.5043227665706052, + "grad_norm": 2.796025276184082, + "learning_rate": 4.929955065039848e-08, + "logits/chosen": -1.5448652505874634, + "logits/rejected": -1.5314280986785889, + "logps/chosen": -46.463924407958984, + "logps/rejected": -49.261375427246094, + "loss": 0.6927, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": 0.0013490957207977772, + "rewards/margins": 0.000978103606030345, + "rewards/rejected": 0.0003709921729750931, + "step": 700 + }, + { + "epoch": 0.5115273775216138, + "grad_norm": 2.809128761291504, + "learning_rate": 4.92494278898755e-08, + "logits/chosen": -1.525039792060852, + "logits/rejected": -1.5088342428207397, + "logps/chosen": -41.34796905517578, + "logps/rejected": -43.392948150634766, + "loss": 0.6927, + "rewards/accuracies": 0.53125, + "rewards/chosen": 0.0013073389418423176, + "rewards/margins": 0.0009176974999718368, + "rewards/rejected": 0.0003896414418704808, + "step": 710 + }, + { + "epoch": 0.5187319884726225, + "grad_norm": 3.328831911087036, + "learning_rate": 4.9197600494702955e-08, + "logits/chosen": -1.4957438707351685, + "logits/rejected": -1.4807456731796265, + "logps/chosen": -49.30406951904297, + "logps/rejected": -52.457130432128906, + "loss": 0.6927, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": 0.001288148807361722, + "rewards/margins": 0.0009355359943583608, + "rewards/rejected": 0.0003526128421071917, + "step": 720 + }, + { + "epoch": 0.5259365994236311, + "grad_norm": 2.870708465576172, + "learning_rate": 4.9144072108132725e-08, + "logits/chosen": -1.5102272033691406, + "logits/rejected": -1.490850806236267, + "logps/chosen": -48.92375946044922, + "logps/rejected": -51.040565490722656, + "loss": 0.6928, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": 0.0015529112424701452, + "rewards/margins": 0.0007295840186998248, + "rewards/rejected": 0.0008233273401856422, + "step": 730 + }, + { + "epoch": 0.5331412103746398, + "grad_norm": 2.905596971511841, + "learning_rate": 4.908884649298937e-08, + "logits/chosen": -1.5039650201797485, + "logits/rejected": -1.500274419784546, + "logps/chosen": -46.73409652709961, + "logps/rejected": -46.288230895996094, + "loss": 0.6929, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.0010712891817092896, + "rewards/margins": 0.0005624311743304133, + "rewards/rejected": 0.000508858182001859, + "step": 740 + }, + { + "epoch": 0.5403458213256485, + "grad_norm": 2.8765416145324707, + "learning_rate": 4.903192753140557e-08, + "logits/chosen": -1.5269978046417236, + "logits/rejected": -1.5104032754898071, + "logps/chosen": -48.90652084350586, + "logps/rejected": -50.098411560058594, + "loss": 0.6924, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": 0.001532660680823028, + "rewards/margins": 0.0014079209649935365, + "rewards/rejected": 0.0001247395994141698, + "step": 750 + }, + { + "epoch": 0.547550432276657, + "grad_norm": 3.3323886394500732, + "learning_rate": 4.897331922454931e-08, + "logits/chosen": -1.453380823135376, + "logits/rejected": -1.4518440961837769, + "logps/chosen": -45.53496551513672, + "logps/rejected": -48.62436294555664, + "loss": 0.6927, + "rewards/accuracies": 0.5625, + "rewards/chosen": 0.0013867730740457773, + "rewards/margins": 0.000980939483270049, + "rewards/rejected": 0.0004058335907757282, + "step": 760 + }, + { + "epoch": 0.5547550432276657, + "grad_norm": 3.25243878364563, + "learning_rate": 4.891302569234256e-08, + "logits/chosen": -1.4739805459976196, + "logits/rejected": -1.467893362045288, + "logps/chosen": -43.22185134887695, + "logps/rejected": -45.91840744018555, + "loss": 0.6923, + "rewards/accuracies": 0.59375, + "rewards/chosen": 0.0018970195669680834, + "rewards/margins": 0.001757220714353025, + "rewards/rejected": 0.00013979877985548228, + "step": 770 + }, + { + "epoch": 0.5619596541786743, + "grad_norm": 2.809793710708618, + "learning_rate": 4.8851051173171656e-08, + "logits/chosen": -1.4990657567977905, + "logits/rejected": -1.4895389080047607, + "logps/chosen": -48.396583557128906, + "logps/rejected": -50.17659378051758, + "loss": 0.6926, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": 0.0017727743834257126, + "rewards/margins": 0.0011530198389664292, + "rewards/rejected": 0.0006197548937052488, + "step": 780 + }, + { + "epoch": 0.569164265129683, + "grad_norm": 2.892955780029297, + "learning_rate": 4.87874000235894e-08, + "logits/chosen": -1.5470690727233887, + "logits/rejected": -1.5371184349060059, + "logps/chosen": -49.8763542175293, + "logps/rejected": -53.440277099609375, + "loss": 0.6924, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": 0.0020356441382318735, + "rewards/margins": 0.001580337411724031, + "rewards/rejected": 0.0004553066974040121, + "step": 790 + }, + { + "epoch": 0.5763688760806917, + "grad_norm": 3.3299455642700195, + "learning_rate": 4.872207671800876e-08, + "logits/chosen": -1.5248336791992188, + "logits/rejected": -1.5134174823760986, + "logps/chosen": -46.861637115478516, + "logps/rejected": -47.89669418334961, + "loss": 0.6924, + "rewards/accuracies": 0.5625, + "rewards/chosen": 0.0019999851938337088, + "rewards/margins": 0.0015228716656565666, + "rewards/rejected": 0.0004771137028001249, + "step": 800 + }, + { + "epoch": 0.5835734870317003, + "grad_norm": 2.6876394748687744, + "learning_rate": 4.865508584838841e-08, + "logits/chosen": -1.5174375772476196, + "logits/rejected": -1.5208656787872314, + "logps/chosen": -44.744140625, + "logps/rejected": -47.87763214111328, + "loss": 0.6926, + "rewards/accuracies": 0.5625, + "rewards/chosen": 0.00140316691249609, + "rewards/margins": 0.0010685885790735483, + "rewards/rejected": 0.000334578420734033, + "step": 810 + }, + { + "epoch": 0.590778097982709, + "grad_norm": 2.7165634632110596, + "learning_rate": 4.858643212390985e-08, + "logits/chosen": -1.5524417161941528, + "logits/rejected": -1.5306005477905273, + "logps/chosen": -46.926429748535156, + "logps/rejected": -47.55287170410156, + "loss": 0.6924, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": 0.0020128923933953047, + "rewards/margins": 0.001531012705527246, + "rewards/rejected": 0.0004818797460757196, + "step": 820 + }, + { + "epoch": 0.5979827089337176, + "grad_norm": 2.6394128799438477, + "learning_rate": 4.851612037064643e-08, + "logits/chosen": -1.510181188583374, + "logits/rejected": -1.5031936168670654, + "logps/chosen": -41.7721061706543, + "logps/rejected": -44.63286590576172, + "loss": 0.6923, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": 0.0025131129659712315, + "rewards/margins": 0.0017323486972600222, + "rewards/rejected": 0.000780764443334192, + "step": 830 + }, + { + "epoch": 0.6051873198847262, + "grad_norm": 2.2611138820648193, + "learning_rate": 4.8444155531224065e-08, + "logits/chosen": -1.519513487815857, + "logits/rejected": -1.5121686458587646, + "logps/chosen": -47.21880340576172, + "logps/rejected": -47.49650955200195, + "loss": 0.6923, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": 0.0021171828266233206, + "rewards/margins": 0.0016254640650004148, + "rewards/rejected": 0.0004917188780382276, + "step": 840 + }, + { + "epoch": 0.6123919308357348, + "grad_norm": 3.8291754722595215, + "learning_rate": 4.8370542664473805e-08, + "logits/chosen": -1.52823805809021, + "logits/rejected": -1.5173234939575195, + "logps/chosen": -47.172088623046875, + "logps/rejected": -50.45137023925781, + "loss": 0.6922, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": 0.0022985998075455427, + "rewards/margins": 0.0019912240095436573, + "rewards/rejected": 0.00030737603083252907, + "step": 850 + }, + { + "epoch": 0.6195965417867435, + "grad_norm": 2.788311719894409, + "learning_rate": 4.829528694507624e-08, + "logits/chosen": -1.5345584154129028, + "logits/rejected": -1.5193954706192017, + "logps/chosen": -56.846717834472656, + "logps/rejected": -56.777488708496094, + "loss": 0.6922, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": 0.002310269046574831, + "rewards/margins": 0.0019274738151580095, + "rewards/rejected": 0.0003827956388704479, + "step": 860 + }, + { + "epoch": 0.6268011527377522, + "grad_norm": 3.0898354053497314, + "learning_rate": 4.821839366319768e-08, + "logits/chosen": -1.5738890171051025, + "logits/rejected": -1.5631177425384521, + "logps/chosen": -47.592525482177734, + "logps/rejected": -50.608699798583984, + "loss": 0.6922, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": 0.001986520830541849, + "rewards/margins": 0.0018442177679389715, + "rewards/rejected": 0.00014230303349904716, + "step": 870 + }, + { + "epoch": 0.6340057636887608, + "grad_norm": 3.0527920722961426, + "learning_rate": 4.813986822411833e-08, + "logits/chosen": -1.594681978225708, + "logits/rejected": -1.586861491203308, + "logps/chosen": -46.469261169433594, + "logps/rejected": -47.6103630065918, + "loss": 0.6924, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": 0.0018088910728693008, + "rewards/margins": 0.0015493319369852543, + "rewards/rejected": 0.0002595590485725552, + "step": 880 + }, + { + "epoch": 0.6412103746397695, + "grad_norm": 2.957890748977661, + "learning_rate": 4.805971614785231e-08, + "logits/chosen": -1.5932258367538452, + "logits/rejected": -1.5828759670257568, + "logps/chosen": -44.192230224609375, + "logps/rejected": -45.84956741333008, + "loss": 0.6922, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": 0.002317091915756464, + "rewards/margins": 0.0019270635675638914, + "rewards/rejected": 0.0003900282899849117, + "step": 890 + }, + { + "epoch": 0.6484149855907781, + "grad_norm": 3.1973183155059814, + "learning_rate": 4.797794306875963e-08, + "logits/chosen": -1.4426988363265991, + "logits/rejected": -1.4457299709320068, + "logps/chosen": -52.90196990966797, + "logps/rejected": -56.030479431152344, + "loss": 0.6924, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": 0.0017495963256806135, + "rewards/margins": 0.0014681669417768717, + "rewards/rejected": 0.0002814295585267246, + "step": 900 + }, + { + "epoch": 0.6556195965417867, + "grad_norm": 3.1332459449768066, + "learning_rate": 4.7894554735150076e-08, + "logits/chosen": -1.4939850568771362, + "logits/rejected": -1.486452579498291, + "logps/chosen": -50.417274475097656, + "logps/rejected": -51.93208694458008, + "loss": 0.6925, + "rewards/accuracies": 0.5625, + "rewards/chosen": 0.001817676005885005, + "rewards/margins": 0.001303942990489304, + "rewards/rejected": 0.0005137331318110228, + "step": 910 + }, + { + "epoch": 0.6628242074927954, + "grad_norm": 2.500265598297119, + "learning_rate": 4.7809557008879185e-08, + "logits/chosen": -1.5261718034744263, + "logits/rejected": -1.5143718719482422, + "logps/chosen": -42.04478073120117, + "logps/rejected": -43.99352264404297, + "loss": 0.6919, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": 0.0026101425755769014, + "rewards/margins": 0.0025680093094706535, + "rewards/rejected": 4.213328429614194e-05, + "step": 920 + }, + { + "epoch": 0.670028818443804, + "grad_norm": 3.3175556659698486, + "learning_rate": 4.772295586493613e-08, + "logits/chosen": -1.5920884609222412, + "logits/rejected": -1.5787022113800049, + "logps/chosen": -46.341896057128906, + "logps/rejected": -48.80807113647461, + "loss": 0.6921, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": 0.002759728580713272, + "rewards/margins": 0.0021550802048295736, + "rewards/rejected": 0.0006046487251296639, + "step": 930 + }, + { + "epoch": 0.6772334293948127, + "grad_norm": 2.305763006210327, + "learning_rate": 4.763475739102374e-08, + "logits/chosen": -1.473327398300171, + "logits/rejected": -1.468834400177002, + "logps/chosen": -54.86418914794922, + "logps/rejected": -55.64992141723633, + "loss": 0.692, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": 0.0026435963809490204, + "rewards/margins": 0.002390109235420823, + "rewards/rejected": 0.0002534873492550105, + "step": 940 + }, + { + "epoch": 0.6844380403458213, + "grad_norm": 2.9104561805725098, + "learning_rate": 4.754496778713054e-08, + "logits/chosen": -1.4290201663970947, + "logits/rejected": -1.442920446395874, + "logps/chosen": -46.24345397949219, + "logps/rejected": -50.873817443847656, + "loss": 0.6924, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": 0.002895154058933258, + "rewards/margins": 0.0014690979151055217, + "rewards/rejected": 0.0014260562602430582, + "step": 950 + }, + { + "epoch": 0.69164265129683, + "grad_norm": 2.9715263843536377, + "learning_rate": 4.7453593365094926e-08, + "logits/chosen": -1.564841866493225, + "logits/rejected": -1.5565474033355713, + "logps/chosen": -48.927528381347656, + "logps/rejected": -51.33686065673828, + "loss": 0.6923, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": 0.0019558072090148926, + "rewards/margins": 0.001724687055684626, + "rewards/rejected": 0.00023112029884941876, + "step": 960 + }, + { + "epoch": 0.6988472622478387, + "grad_norm": 3.6742053031921387, + "learning_rate": 4.736064054816145e-08, + "logits/chosen": -1.5793306827545166, + "logits/rejected": -1.5710303783416748, + "logps/chosen": -44.4240837097168, + "logps/rejected": -47.71710968017578, + "loss": 0.6918, + "rewards/accuracies": 0.6937500238418579, + "rewards/chosen": 0.003227539826184511, + "rewards/margins": 0.0027411046903580427, + "rewards/rejected": 0.00048643528134562075, + "step": 970 + }, + { + "epoch": 0.7060518731988472, + "grad_norm": 2.770151376724243, + "learning_rate": 4.726611587052933e-08, + "logits/chosen": -1.430496335029602, + "logits/rejected": -1.4332740306854248, + "logps/chosen": -50.69718551635742, + "logps/rejected": -55.78377151489258, + "loss": 0.6925, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.002387463580816984, + "rewards/margins": 0.0013439650647342205, + "rewards/rejected": 0.00104349828325212, + "step": 980 + }, + { + "epoch": 0.7132564841498559, + "grad_norm": 3.8736650943756104, + "learning_rate": 4.71700259768931e-08, + "logits/chosen": -1.5389162302017212, + "logits/rejected": -1.5327080488204956, + "logps/chosen": -50.4788932800293, + "logps/rejected": -51.98632049560547, + "loss": 0.6923, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": 0.0025045094080269337, + "rewards/margins": 0.0018028088379651308, + "rewards/rejected": 0.0007017005700618029, + "step": 990 + }, + { + "epoch": 0.7204610951008645, + "grad_norm": 2.781522035598755, + "learning_rate": 4.707237762197549e-08, + "logits/chosen": -1.5228068828582764, + "logits/rejected": -1.5111085176467896, + "logps/chosen": -47.027870178222656, + "logps/rejected": -49.01156997680664, + "loss": 0.6923, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.0033112752716988325, + "rewards/margins": 0.0017588225891813636, + "rewards/rejected": 0.0015524530317634344, + "step": 1000 + }, + { + "epoch": 0.7276657060518732, + "grad_norm": 3.929330825805664, + "learning_rate": 4.697317767005265e-08, + "logits/chosen": -1.5303703546524048, + "logits/rejected": -1.5189971923828125, + "logps/chosen": -43.00761795043945, + "logps/rejected": -44.80951690673828, + "loss": 0.692, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0029724142514169216, + "rewards/margins": 0.0022910248953849077, + "rewards/rejected": 0.0006813893560320139, + "step": 1010 + }, + { + "epoch": 0.7348703170028819, + "grad_norm": 2.842343807220459, + "learning_rate": 4.6872433094471577e-08, + "logits/chosen": -1.548322319984436, + "logits/rejected": -1.5355441570281982, + "logps/chosen": -46.55397415161133, + "logps/rejected": -48.415069580078125, + "loss": 0.6923, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": 0.0027256556786596775, + "rewards/margins": 0.001606556586921215, + "rewards/rejected": 0.0011190990917384624, + "step": 1020 + }, + { + "epoch": 0.7420749279538905, + "grad_norm": 2.672954559326172, + "learning_rate": 4.677015097715994e-08, + "logits/chosen": -1.4801313877105713, + "logits/rejected": -1.4725544452667236, + "logps/chosen": -43.47493362426758, + "logps/rejected": -46.784507751464844, + "loss": 0.6917, + "rewards/accuracies": 0.53125, + "rewards/chosen": 0.003656183136627078, + "rewards/margins": 0.0029149625916033983, + "rewards/rejected": 0.0007412207196466625, + "step": 1030 + }, + { + "epoch": 0.7492795389048992, + "grad_norm": 2.4194390773773193, + "learning_rate": 4.666633850812825e-08, + "logits/chosen": -1.5239073038101196, + "logits/rejected": -1.507598638534546, + "logps/chosen": -46.25876998901367, + "logps/rejected": -48.20856475830078, + "loss": 0.6921, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": 0.0025006632786244154, + "rewards/margins": 0.0021574136335402727, + "rewards/rejected": 0.0003432496450841427, + "step": 1040 + }, + { + "epoch": 0.7564841498559077, + "grad_norm": 2.3646445274353027, + "learning_rate": 4.656100298496439e-08, + "logits/chosen": -1.4333226680755615, + "logits/rejected": -1.4197046756744385, + "logps/chosen": -41.23944091796875, + "logps/rejected": -44.33473205566406, + "loss": 0.6916, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": 0.0036236427258700132, + "rewards/margins": 0.0031666383147239685, + "rewards/rejected": 0.00045700446935370564, + "step": 1050 + }, + { + "epoch": 0.7636887608069164, + "grad_norm": 3.0884833335876465, + "learning_rate": 4.6454151812320715e-08, + "logits/chosen": -1.5102076530456543, + "logits/rejected": -1.484427809715271, + "logps/chosen": -47.206207275390625, + "logps/rejected": -48.68370819091797, + "loss": 0.6917, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": 0.003298333380371332, + "rewards/margins": 0.0030018885154277086, + "rewards/rejected": 0.00029644512687809765, + "step": 1060 + }, + { + "epoch": 0.770893371757925, + "grad_norm": 3.838554859161377, + "learning_rate": 4.6345792501393434e-08, + "logits/chosen": -1.4992831945419312, + "logits/rejected": -1.4937461614608765, + "logps/chosen": -53.69269943237305, + "logps/rejected": -57.73991012573242, + "loss": 0.6916, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": 0.00392124941572547, + "rewards/margins": 0.0031723701395094395, + "rewards/rejected": 0.0007488794508390129, + "step": 1070 + }, + { + "epoch": 0.7780979827089337, + "grad_norm": 3.0563292503356934, + "learning_rate": 4.6235932669394676e-08, + "logits/chosen": -1.5073121786117554, + "logits/rejected": -1.4990915060043335, + "logps/chosen": -48.08951950073242, + "logps/rejected": -51.09489822387695, + "loss": 0.6915, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": 0.004118567798286676, + "rewards/margins": 0.0033183638006448746, + "rewards/rejected": 0.0008002036483958364, + "step": 1080 + }, + { + "epoch": 0.7853025936599424, + "grad_norm": 3.5673720836639404, + "learning_rate": 4.612458003901698e-08, + "logits/chosen": -1.5294950008392334, + "logits/rejected": -1.523989200592041, + "logps/chosen": -52.46628952026367, + "logps/rejected": -56.08174514770508, + "loss": 0.6913, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": 0.0030290051363408566, + "rewards/margins": 0.0037631914019584656, + "rewards/rejected": -0.0007341863238252699, + "step": 1090 + }, + { + "epoch": 0.792507204610951, + "grad_norm": 3.1977949142456055, + "learning_rate": 4.6011742437890476e-08, + "logits/chosen": -1.538527011871338, + "logits/rejected": -1.5158568620681763, + "logps/chosen": -47.33205795288086, + "logps/rejected": -48.808815002441406, + "loss": 0.6919, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": 0.0029173900838941336, + "rewards/margins": 0.002560637192800641, + "rewards/rejected": 0.0003567528910934925, + "step": 1100 + }, + { + "epoch": 0.7997118155619597, + "grad_norm": 2.2893710136413574, + "learning_rate": 4.589742779803259e-08, + "logits/chosen": -1.5475555658340454, + "logits/rejected": -1.5350109338760376, + "logps/chosen": -46.484588623046875, + "logps/rejected": -48.64214324951172, + "loss": 0.6919, + "rewards/accuracies": 0.53125, + "rewards/chosen": 0.0027208970859646797, + "rewards/margins": 0.002428521169349551, + "rewards/rejected": 0.0002923758584074676, + "step": 1110 + }, + { + "epoch": 0.8069164265129684, + "grad_norm": 2.6367037296295166, + "learning_rate": 4.5781644155290486e-08, + "logits/chosen": -1.4872174263000488, + "logits/rejected": -1.4771387577056885, + "logps/chosen": -45.5018424987793, + "logps/rejected": -46.52324676513672, + "loss": 0.6913, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.004312233533710241, + "rewards/margins": 0.0036662842612713575, + "rewards/rejected": 0.0006459490396082401, + "step": 1120 + }, + { + "epoch": 0.8141210374639769, + "grad_norm": 2.842519521713257, + "learning_rate": 4.566439964877613e-08, + "logits/chosen": -1.5221550464630127, + "logits/rejected": -1.5159690380096436, + "logps/chosen": -43.44307327270508, + "logps/rejected": -45.21900177001953, + "loss": 0.6922, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": 0.002209798665717244, + "rewards/margins": 0.0018761273240670562, + "rewards/rejected": 0.00033367107971571386, + "step": 1130 + }, + { + "epoch": 0.8213256484149856, + "grad_norm": 2.909646511077881, + "learning_rate": 4.554570252029421e-08, + "logits/chosen": -1.5703493356704712, + "logits/rejected": -1.5615266561508179, + "logps/chosen": -46.792572021484375, + "logps/rejected": -49.12213134765625, + "loss": 0.691, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": 0.004547302611172199, + "rewards/margins": 0.004351162351667881, + "rewards/rejected": 0.00019614025950431824, + "step": 1140 + }, + { + "epoch": 0.8285302593659942, + "grad_norm": 2.6748578548431396, + "learning_rate": 4.542556111376274e-08, + "logits/chosen": -1.5654969215393066, + "logits/rejected": -1.554164171218872, + "logps/chosen": -48.726966857910156, + "logps/rejected": -50.896202087402344, + "loss": 0.6917, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": 0.003084657248109579, + "rewards/margins": 0.0029002639930695295, + "rewards/rejected": 0.00018439313862472773, + "step": 1150 + }, + { + "epoch": 0.8357348703170029, + "grad_norm": 3.0902979373931885, + "learning_rate": 4.5303983874626506e-08, + "logits/chosen": -1.5411643981933594, + "logits/rejected": -1.5292751789093018, + "logps/chosen": -50.65459060668945, + "logps/rejected": -51.3670654296875, + "loss": 0.6919, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": 0.0027450700290501118, + "rewards/margins": 0.0025953901931643486, + "rewards/rejected": 0.0001496800541644916, + "step": 1160 + }, + { + "epoch": 0.8429394812680115, + "grad_norm": 3.544069290161133, + "learning_rate": 4.518097934926339e-08, + "logits/chosen": -1.4598362445831299, + "logits/rejected": -1.4344513416290283, + "logps/chosen": -46.78550338745117, + "logps/rejected": -46.877071380615234, + "loss": 0.6916, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.0031762472353875637, + "rewards/margins": 0.0031154484022408724, + "rewards/rejected": 6.079913509893231e-05, + "step": 1170 + }, + { + "epoch": 0.8501440922190202, + "grad_norm": 3.858111619949341, + "learning_rate": 4.505655618438363e-08, + "logits/chosen": -1.424443244934082, + "logits/rejected": -1.4106619358062744, + "logps/chosen": -48.665103912353516, + "logps/rejected": -49.68077850341797, + "loss": 0.6917, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": 0.0031317579559981823, + "rewards/margins": 0.002937522018328309, + "rewards/rejected": 0.00019423609774094075, + "step": 1180 + }, + { + "epoch": 0.8573487031700289, + "grad_norm": 2.923030376434326, + "learning_rate": 4.4930723126421945e-08, + "logits/chosen": -1.589641809463501, + "logits/rejected": -1.566506266593933, + "logps/chosen": -49.06594467163086, + "logps/rejected": -50.39019012451172, + "loss": 0.6915, + "rewards/accuracies": 0.59375, + "rewards/chosen": 0.003074865322560072, + "rewards/margins": 0.0032937501091510057, + "rewards/rejected": -0.00021888469927944243, + "step": 1190 + }, + { + "epoch": 0.8645533141210374, + "grad_norm": 3.2700655460357666, + "learning_rate": 4.48034890209227e-08, + "logits/chosen": -1.4649052619934082, + "logits/rejected": -1.4451799392700195, + "logps/chosen": -51.660499572753906, + "logps/rejected": -53.60346221923828, + "loss": 0.6915, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": 0.0028472330886870623, + "rewards/margins": 0.003242159727960825, + "rewards/rejected": -0.0003949264937546104, + "step": 1200 + }, + { + "epoch": 0.8717579250720461, + "grad_norm": 2.6366593837738037, + "learning_rate": 4.4674862811918155e-08, + "logits/chosen": -1.4467828273773193, + "logits/rejected": -1.4440934658050537, + "logps/chosen": -43.3638801574707, + "logps/rejected": -46.49591064453125, + "loss": 0.6916, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": 0.0036537572741508484, + "rewards/margins": 0.0032189779449254274, + "rewards/rejected": 0.00043477994040586054, + "step": 1210 + }, + { + "epoch": 0.8789625360230547, + "grad_norm": 3.452131748199463, + "learning_rate": 4.454485354129966e-08, + "logits/chosen": -1.4948376417160034, + "logits/rejected": -1.4891588687896729, + "logps/chosen": -46.58236312866211, + "logps/rejected": -50.1088981628418, + "loss": 0.6912, + "rewards/accuracies": 0.5625, + "rewards/chosen": 0.003989598713815212, + "rewards/margins": 0.0038787820376455784, + "rewards/rejected": 0.00011081698175985366, + "step": 1220 + }, + { + "epoch": 0.8861671469740634, + "grad_norm": 2.979247570037842, + "learning_rate": 4.4413470348182124e-08, + "logits/chosen": -1.446300745010376, + "logits/rejected": -1.4229693412780762, + "logps/chosen": -48.74427032470703, + "logps/rejected": -50.7912483215332, + "loss": 0.6913, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": 0.0037915543653070927, + "rewards/margins": 0.003707052441313863, + "rewards/rejected": 8.450145833194256e-05, + "step": 1230 + }, + { + "epoch": 0.8933717579250721, + "grad_norm": 3.6042563915252686, + "learning_rate": 4.42807224682615e-08, + "logits/chosen": -1.5023893117904663, + "logits/rejected": -1.4898313283920288, + "logps/chosen": -42.887596130371094, + "logps/rejected": -45.985328674316406, + "loss": 0.6909, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": 0.0037057988811284304, + "rewards/margins": 0.0045966231264173985, + "rewards/rejected": -0.0008908241870813072, + "step": 1240 + }, + { + "epoch": 0.9005763688760807, + "grad_norm": 2.529266595840454, + "learning_rate": 4.4146619233165604e-08, + "logits/chosen": -1.5505568981170654, + "logits/rejected": -1.5460566282272339, + "logps/chosen": -50.64966583251953, + "logps/rejected": -54.0062370300293, + "loss": 0.6917, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.003028963692486286, + "rewards/margins": 0.002853470155969262, + "rewards/rejected": 0.00017549384210724384, + "step": 1250 + }, + { + "epoch": 0.9077809798270894, + "grad_norm": 3.1239147186279297, + "learning_rate": 4.4011170069798126e-08, + "logits/chosen": -1.505274772644043, + "logits/rejected": -1.5213569402694702, + "logps/chosen": -46.48722457885742, + "logps/rejected": -53.446556091308594, + "loss": 0.6916, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": 0.002888137474656105, + "rewards/margins": 0.0031193068716675043, + "rewards/rejected": -0.00023116909142117947, + "step": 1260 + }, + { + "epoch": 0.9149855907780979, + "grad_norm": 3.1936755180358887, + "learning_rate": 4.387438449967594e-08, + "logits/chosen": -1.4545634984970093, + "logits/rejected": -1.4413819313049316, + "logps/chosen": -45.36618423461914, + "logps/rejected": -47.985939025878906, + "loss": 0.6905, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": 0.004917544312775135, + "rewards/margins": 0.005304847843945026, + "rewards/rejected": -0.0003873028326779604, + "step": 1270 + }, + { + "epoch": 0.9221902017291066, + "grad_norm": 3.4427430629730225, + "learning_rate": 4.373627213825983e-08, + "logits/chosen": -1.6089175939559937, + "logits/rejected": -1.598972201347351, + "logps/chosen": -46.191490173339844, + "logps/rejected": -49.68521499633789, + "loss": 0.6906, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": 0.004848025739192963, + "rewards/margins": 0.0052161673083901405, + "rewards/rejected": -0.00036814124905504286, + "step": 1280 + }, + { + "epoch": 0.9293948126801153, + "grad_norm": 2.483668088912964, + "learning_rate": 4.359684269427848e-08, + "logits/chosen": -1.5662097930908203, + "logits/rejected": -1.5633054971694946, + "logps/chosen": -45.598777770996094, + "logps/rejected": -49.13982009887695, + "loss": 0.691, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": 0.00441353302448988, + "rewards/margins": 0.00428149476647377, + "rewards/rejected": 0.00013203815615270287, + "step": 1290 + }, + { + "epoch": 0.9365994236311239, + "grad_norm": 3.012500286102295, + "learning_rate": 4.34561059690461e-08, + "logits/chosen": -1.6093149185180664, + "logits/rejected": -1.6080818176269531, + "logps/chosen": -47.28533172607422, + "logps/rejected": -48.898536682128906, + "loss": 0.6922, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.002217743080109358, + "rewards/margins": 0.001927342265844345, + "rewards/rejected": 0.0002904009015765041, + "step": 1300 + }, + { + "epoch": 0.9438040345821326, + "grad_norm": 2.672034740447998, + "learning_rate": 4.3314071855773314e-08, + "logits/chosen": -1.5703166723251343, + "logits/rejected": -1.5722987651824951, + "logps/chosen": -41.89187240600586, + "logps/rejected": -45.08455276489258, + "loss": 0.6914, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": 0.0034990236163139343, + "rewards/margins": 0.0036229409743100405, + "rewards/rejected": -0.00012391725613269955, + "step": 1310 + }, + { + "epoch": 0.9510086455331412, + "grad_norm": 3.103219985961914, + "learning_rate": 4.3170750338871806e-08, + "logits/chosen": -1.5061099529266357, + "logits/rejected": -1.491026759147644, + "logps/chosen": -46.509437561035156, + "logps/rejected": -49.67821502685547, + "loss": 0.6909, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.004872228484600782, + "rewards/margins": 0.004575548693537712, + "rewards/rejected": 0.0002966797037515789, + "step": 1320 + }, + { + "epoch": 0.9582132564841499, + "grad_norm": 3.005204677581787, + "learning_rate": 4.3026151493252414e-08, + "logits/chosen": -1.5492498874664307, + "logits/rejected": -1.5279552936553955, + "logps/chosen": -51.47428512573242, + "logps/rejected": -52.905067443847656, + "loss": 0.6908, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.004221352748572826, + "rewards/margins": 0.0048477440141141415, + "rewards/rejected": -0.0006263910909183323, + "step": 1330 + }, + { + "epoch": 0.9654178674351584, + "grad_norm": 3.5376358032226562, + "learning_rate": 4.2880285483616895e-08, + "logits/chosen": -1.533168077468872, + "logits/rejected": -1.5301328897476196, + "logps/chosen": -45.71091842651367, + "logps/rejected": -49.009521484375, + "loss": 0.6912, + "rewards/accuracies": 0.59375, + "rewards/chosen": 0.003885247278958559, + "rewards/margins": 0.003925333730876446, + "rewards/rejected": -4.0086473745759577e-05, + "step": 1340 + }, + { + "epoch": 0.9726224783861671, + "grad_norm": 2.741413116455078, + "learning_rate": 4.273316256374342e-08, + "logits/chosen": -1.403411865234375, + "logits/rejected": -1.3984428644180298, + "logps/chosen": -52.229103088378906, + "logps/rejected": -53.140968322753906, + "loss": 0.6911, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.0044440277852118015, + "rewards/margins": 0.004066800232976675, + "rewards/rejected": 0.00037722705747000873, + "step": 1350 + }, + { + "epoch": 0.9798270893371758, + "grad_norm": 3.3762712478637695, + "learning_rate": 4.258479307576576e-08, + "logits/chosen": -1.5002410411834717, + "logits/rejected": -1.495216727256775, + "logps/chosen": -43.78204345703125, + "logps/rejected": -45.711483001708984, + "loss": 0.6903, + "rewards/accuracies": 0.59375, + "rewards/chosen": 0.005402544513344765, + "rewards/margins": 0.005806138273328543, + "rewards/rejected": -0.00040359393460676074, + "step": 1360 + }, + { + "epoch": 0.9870317002881844, + "grad_norm": 2.7428503036499023, + "learning_rate": 4.243518744944626e-08, + "logits/chosen": -1.5039384365081787, + "logits/rejected": -1.5001270771026611, + "logps/chosen": -43.2661247253418, + "logps/rejected": -47.12836456298828, + "loss": 0.6908, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": 0.004102085717022419, + "rewards/margins": 0.004668028559535742, + "rewards/rejected": -0.0005659434827975929, + "step": 1370 + }, + { + "epoch": 0.9942363112391931, + "grad_norm": 3.406386137008667, + "learning_rate": 4.22843562014427e-08, + "logits/chosen": -1.4495034217834473, + "logits/rejected": -1.4397344589233398, + "logps/chosen": -46.86280059814453, + "logps/rejected": -49.02347946166992, + "loss": 0.6917, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.0041961586102843285, + "rewards/margins": 0.0028399438597261906, + "rewards/rejected": 0.001356214052066207, + "step": 1380 + }, + { + "epoch": 1.0014409221902016, + "grad_norm": 3.115910053253174, + "learning_rate": 4.2132309934569e-08, + "logits/chosen": -1.5670406818389893, + "logits/rejected": -1.5619332790374756, + "logps/chosen": -43.755001068115234, + "logps/rejected": -46.139137268066406, + "loss": 0.6912, + "rewards/accuracies": 0.59375, + "rewards/chosen": 0.005227426066994667, + "rewards/margins": 0.003935725893825293, + "rewards/rejected": 0.0012916993582621217, + "step": 1390 + }, + { + "epoch": 1.0086455331412103, + "grad_norm": 2.4783718585968018, + "learning_rate": 4.197905933704989e-08, + "logits/chosen": -1.4311118125915527, + "logits/rejected": -1.421370029449463, + "logps/chosen": -47.266395568847656, + "logps/rejected": -49.955780029296875, + "loss": 0.6897, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.005098997615277767, + "rewards/margins": 0.007058604154735804, + "rewards/rejected": -0.001959607470780611, + "step": 1400 + }, + { + "epoch": 1.015850144092219, + "grad_norm": 2.7212026119232178, + "learning_rate": 4.1824615181769577e-08, + "logits/chosen": -1.486352562904358, + "logits/rejected": -1.4931161403656006, + "logps/chosen": -43.8072509765625, + "logps/rejected": -47.75897216796875, + "loss": 0.6901, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.005026941187679768, + "rewards/margins": 0.006201364565640688, + "rewards/rejected": -0.0011744231451302767, + "step": 1410 + }, + { + "epoch": 1.0230547550432276, + "grad_norm": 3.118551254272461, + "learning_rate": 4.1668988325514434e-08, + "logits/chosen": -1.524102807044983, + "logits/rejected": -1.5137133598327637, + "logps/chosen": -49.41641616821289, + "logps/rejected": -52.12779998779297, + "loss": 0.6899, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": 0.004953524563461542, + "rewards/margins": 0.00664835050702095, + "rewards/rejected": -0.0016948258271440864, + "step": 1420 + }, + { + "epoch": 1.0302593659942363, + "grad_norm": 3.0052857398986816, + "learning_rate": 4.1512189708209844e-08, + "logits/chosen": -1.5740153789520264, + "logits/rejected": -1.5645209550857544, + "logps/chosen": -38.22710418701172, + "logps/rejected": -39.43300247192383, + "loss": 0.6906, + "rewards/accuracies": 0.59375, + "rewards/chosen": 0.0053091454319655895, + "rewards/margins": 0.005104938056319952, + "rewards/rejected": 0.00020420753571670502, + "step": 1430 + }, + { + "epoch": 1.037463976945245, + "grad_norm": 3.6204934120178223, + "learning_rate": 4.1354230352151143e-08, + "logits/chosen": -1.5043399333953857, + "logits/rejected": -1.4913597106933594, + "logps/chosen": -56.45264434814453, + "logps/rejected": -56.661643981933594, + "loss": 0.6907, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.00407161470502615, + "rewards/margins": 0.004998006857931614, + "rewards/rejected": -0.0009263925021514297, + "step": 1440 + }, + { + "epoch": 1.0446685878962536, + "grad_norm": 2.5718061923980713, + "learning_rate": 4.119512136122882e-08, + "logits/chosen": -1.6087749004364014, + "logits/rejected": -1.621289610862732, + "logps/chosen": -42.30548095703125, + "logps/rejected": -48.45243453979492, + "loss": 0.6898, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": 0.004055247642099857, + "rewards/margins": 0.0067529031075537205, + "rewards/rejected": -0.002697654766961932, + "step": 1450 + }, + { + "epoch": 1.0518731988472623, + "grad_norm": 3.4756808280944824, + "learning_rate": 4.103487392014795e-08, + "logits/chosen": -1.4752939939498901, + "logits/rejected": -1.4567186832427979, + "logps/chosen": -46.37599563598633, + "logps/rejected": -51.025108337402344, + "loss": 0.6888, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": 0.005677501205354929, + "rewards/margins": 0.008715528063476086, + "rewards/rejected": -0.0030380270909518003, + "step": 1460 + }, + { + "epoch": 1.059077809798271, + "grad_norm": 2.909818649291992, + "learning_rate": 4.087349929364192e-08, + "logits/chosen": -1.565161108970642, + "logits/rejected": -1.5442824363708496, + "logps/chosen": -42.54533386230469, + "logps/rejected": -45.89342498779297, + "loss": 0.6895, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": 0.00476106209680438, + "rewards/margins": 0.007298412267118692, + "rewards/rejected": -0.002537350170314312, + "step": 1470 + }, + { + "epoch": 1.0662824207492796, + "grad_norm": 2.4459564685821533, + "learning_rate": 4.0711008825680645e-08, + "logits/chosen": -1.504279375076294, + "logits/rejected": -1.4850876331329346, + "logps/chosen": -47.31806945800781, + "logps/rejected": -50.19545364379883, + "loss": 0.6903, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.004515786189585924, + "rewards/margins": 0.005739896558225155, + "rewards/rejected": -0.0012241104850545526, + "step": 1480 + }, + { + "epoch": 1.0734870317002883, + "grad_norm": 3.5417470932006836, + "learning_rate": 4.054741393867306e-08, + "logits/chosen": -1.4754365682601929, + "logits/rejected": -1.464170217514038, + "logps/chosen": -54.07590866088867, + "logps/rejected": -55.700416564941406, + "loss": 0.6903, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": 0.0043455869890749454, + "rewards/margins": 0.005824446678161621, + "rewards/rejected": -0.0014788598055019975, + "step": 1490 + }, + { + "epoch": 1.080691642651297, + "grad_norm": 2.9604134559631348, + "learning_rate": 4.038272613266419e-08, + "logits/chosen": -1.5457851886749268, + "logits/rejected": -1.521743655204773, + "logps/chosen": -44.90116882324219, + "logps/rejected": -47.38478088378906, + "loss": 0.6903, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": 0.004831579513847828, + "rewards/margins": 0.005809755530208349, + "rewards/rejected": -0.0009781757835298777, + "step": 1500 + }, + { + "epoch": 1.0878962536023056, + "grad_norm": 3.386324882507324, + "learning_rate": 4.0216956984526784e-08, + "logits/chosen": -1.5541696548461914, + "logits/rejected": -1.5498359203338623, + "logps/chosen": -42.88204574584961, + "logps/rejected": -45.56418228149414, + "loss": 0.6896, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.0052653830498456955, + "rewards/margins": 0.00714817363768816, + "rewards/rejected": -0.0018827903550118208, + "step": 1510 + }, + { + "epoch": 1.0951008645533142, + "grad_norm": 3.1095175743103027, + "learning_rate": 4.0050118147147446e-08, + "logits/chosen": -1.5181224346160889, + "logits/rejected": -1.5101962089538574, + "logps/chosen": -53.42927169799805, + "logps/rejected": -52.1214714050293, + "loss": 0.6922, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": 0.002941600512713194, + "rewards/margins": 0.00190871418453753, + "rewards/rejected": 0.0010328865610063076, + "step": 1520 + }, + { + "epoch": 1.1023054755043227, + "grad_norm": 3.0901896953582764, + "learning_rate": 3.988222134860755e-08, + "logits/chosen": -1.5638874769210815, + "logits/rejected": -1.5509564876556396, + "logps/chosen": -47.26706314086914, + "logps/rejected": -51.6654167175293, + "loss": 0.6898, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": 0.005253427661955357, + "rewards/margins": 0.00679327268153429, + "rewards/rejected": -0.0015398439718410373, + "step": 1530 + }, + { + "epoch": 1.1095100864553313, + "grad_norm": 3.111027717590332, + "learning_rate": 3.9713278391358724e-08, + "logits/chosen": -1.5749475955963135, + "logits/rejected": -1.5632776021957397, + "logps/chosen": -45.95053482055664, + "logps/rejected": -49.204917907714844, + "loss": 0.6903, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.005047272425144911, + "rewards/margins": 0.005783633328974247, + "rewards/rejected": -0.0007363610784523189, + "step": 1540 + }, + { + "epoch": 1.11671469740634, + "grad_norm": 2.466688632965088, + "learning_rate": 3.954330115139328e-08, + "logits/chosen": -1.5432218313217163, + "logits/rejected": -1.5328999757766724, + "logps/chosen": -46.747894287109375, + "logps/rejected": -48.846595764160156, + "loss": 0.6898, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": 0.0048002442345023155, + "rewards/margins": 0.006832278333604336, + "rewards/rejected": -0.0020320338662713766, + "step": 1550 + }, + { + "epoch": 1.1239193083573487, + "grad_norm": 4.091006278991699, + "learning_rate": 3.937230157740931e-08, + "logits/chosen": -1.591812252998352, + "logits/rejected": -1.5731406211853027, + "logps/chosen": -47.905906677246094, + "logps/rejected": -51.46516799926758, + "loss": 0.6893, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": 0.0062894513830542564, + "rewards/margins": 0.007767542265355587, + "rewards/rejected": -0.001478090649470687, + "step": 1560 + }, + { + "epoch": 1.1311239193083573, + "grad_norm": 2.356121778488159, + "learning_rate": 3.920029168997077e-08, + "logits/chosen": -1.5560190677642822, + "logits/rejected": -1.5427652597427368, + "logps/chosen": -48.724090576171875, + "logps/rejected": -51.52899932861328, + "loss": 0.6903, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": 0.004863058216869831, + "rewards/margins": 0.005728754214942455, + "rewards/rejected": -0.000865696172695607, + "step": 1570 + }, + { + "epoch": 1.138328530259366, + "grad_norm": 3.9722375869750977, + "learning_rate": 3.9027283580662476e-08, + "logits/chosen": -1.5196526050567627, + "logits/rejected": -1.5073282718658447, + "logps/chosen": -49.61786651611328, + "logps/rejected": -52.77516555786133, + "loss": 0.6888, + "rewards/accuracies": 0.65625, + "rewards/chosen": 0.005080161150544882, + "rewards/margins": 0.008876914158463478, + "rewards/rejected": -0.003796751843765378, + "step": 1580 + }, + { + "epoch": 1.1455331412103746, + "grad_norm": 3.991994619369507, + "learning_rate": 3.885328941124014e-08, + "logits/chosen": -1.5015205144882202, + "logits/rejected": -1.488493800163269, + "logps/chosen": -45.93663787841797, + "logps/rejected": -50.631431579589844, + "loss": 0.6896, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": 0.005211810581386089, + "rewards/margins": 0.0071194409392774105, + "rewards/rejected": -0.0019076305907219648, + "step": 1590 + }, + { + "epoch": 1.1527377521613833, + "grad_norm": 3.034956216812134, + "learning_rate": 3.867832141277539e-08, + "logits/chosen": -1.5487406253814697, + "logits/rejected": -1.5294761657714844, + "logps/chosen": -49.104705810546875, + "logps/rejected": -51.280723571777344, + "loss": 0.69, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": 0.004329115618020296, + "rewards/margins": 0.006378514226526022, + "rewards/rejected": -0.002049399074167013, + "step": 1600 + }, + { + "epoch": 1.159942363112392, + "grad_norm": 3.430689573287964, + "learning_rate": 3.850239188479606e-08, + "logits/chosen": -1.462114930152893, + "logits/rejected": -1.4583441019058228, + "logps/chosen": -46.75814437866211, + "logps/rejected": -49.108917236328125, + "loss": 0.6901, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.004666672088205814, + "rewards/margins": 0.006269653793424368, + "rewards/rejected": -0.0016029814723879099, + "step": 1610 + }, + { + "epoch": 1.1671469740634006, + "grad_norm": 3.5557363033294678, + "learning_rate": 3.832551319442151e-08, + "logits/chosen": -1.5857681035995483, + "logits/rejected": -1.5824248790740967, + "logps/chosen": -49.61214065551758, + "logps/rejected": -53.804893493652344, + "loss": 0.69, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.005068537779152393, + "rewards/margins": 0.006406673230230808, + "rewards/rejected": -0.0013381352182477713, + "step": 1620 + }, + { + "epoch": 1.1743515850144093, + "grad_norm": 4.248888969421387, + "learning_rate": 3.81476977754933e-08, + "logits/chosen": -1.399951696395874, + "logits/rejected": -1.3853706121444702, + "logps/chosen": -51.313499450683594, + "logps/rejected": -50.658668518066406, + "loss": 0.6902, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": 0.004184350371360779, + "rewards/margins": 0.0060124825686216354, + "rewards/rejected": -0.0018281325465068221, + "step": 1630 + }, + { + "epoch": 1.181556195965418, + "grad_norm": 2.400538206100464, + "learning_rate": 3.796895812770114e-08, + "logits/chosen": -1.5025560855865479, + "logits/rejected": -1.4927390813827515, + "logps/chosen": -45.765350341796875, + "logps/rejected": -47.244354248046875, + "loss": 0.6894, + "rewards/accuracies": 0.59375, + "rewards/chosen": 0.006023012101650238, + "rewards/margins": 0.0076335109770298, + "rewards/rejected": -0.0016104992246255279, + "step": 1640 + }, + { + "epoch": 1.1887608069164266, + "grad_norm": 3.0615625381469727, + "learning_rate": 3.7789306815704216e-08, + "logits/chosen": -1.527266025543213, + "logits/rejected": -1.516747236251831, + "logps/chosen": -40.9234733581543, + "logps/rejected": -42.032447814941406, + "loss": 0.6907, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.0036138053983449936, + "rewards/margins": 0.004900342784821987, + "rewards/rejected": -0.00128653715364635, + "step": 1650 + }, + { + "epoch": 1.195965417867435, + "grad_norm": 2.6502482891082764, + "learning_rate": 3.760875646824795e-08, + "logits/chosen": -1.3961491584777832, + "logits/rejected": -1.397456407546997, + "logps/chosen": -46.144996643066406, + "logps/rejected": -48.4418830871582, + "loss": 0.6897, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.003492361633107066, + "rewards/margins": 0.007100371178239584, + "rewards/rejected": -0.003608010010793805, + "step": 1660 + }, + { + "epoch": 1.2031700288184437, + "grad_norm": 3.6749885082244873, + "learning_rate": 3.742731977727623e-08, + "logits/chosen": -1.5396411418914795, + "logits/rejected": -1.5324022769927979, + "logps/chosen": -45.20623016357422, + "logps/rejected": -49.124549865722656, + "loss": 0.6895, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": 0.006122785620391369, + "rewards/margins": 0.007418873719871044, + "rewards/rejected": -0.0012960887979716063, + "step": 1670 + }, + { + "epoch": 1.2103746397694524, + "grad_norm": 3.708371877670288, + "learning_rate": 3.7245009497039244e-08, + "logits/chosen": -1.4357057809829712, + "logits/rejected": -1.420304298400879, + "logps/chosen": -45.42781066894531, + "logps/rejected": -49.5192756652832, + "loss": 0.6891, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": 0.0038997051306068897, + "rewards/margins": 0.008227763697504997, + "rewards/rejected": -0.0043280585668981075, + "step": 1680 + }, + { + "epoch": 1.217579250720461, + "grad_norm": 2.6539306640625, + "learning_rate": 3.7061838443196886e-08, + "logits/chosen": -1.511749029159546, + "logits/rejected": -1.502352237701416, + "logps/chosen": -50.04130172729492, + "logps/rejected": -52.17155075073242, + "loss": 0.6885, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.006058938335627317, + "rewards/margins": 0.00949786975979805, + "rewards/rejected": -0.003438931657001376, + "step": 1690 + }, + { + "epoch": 1.2247838616714697, + "grad_norm": 3.0108015537261963, + "learning_rate": 3.68778194919179e-08, + "logits/chosen": -1.472414255142212, + "logits/rejected": -1.4666353464126587, + "logps/chosen": -50.08484649658203, + "logps/rejected": -53.3315315246582, + "loss": 0.6878, + "rewards/accuracies": 0.6875, + "rewards/chosen": 0.008036890998482704, + "rewards/margins": 0.010940475389361382, + "rewards/rejected": -0.0029035855550318956, + "step": 1700 + }, + { + "epoch": 1.2319884726224783, + "grad_norm": 3.6009788513183594, + "learning_rate": 3.66929655789747e-08, + "logits/chosen": -1.5710514783859253, + "logits/rejected": -1.5530909299850464, + "logps/chosen": -41.97422790527344, + "logps/rejected": -46.53120040893555, + "loss": 0.689, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": 0.005440887995064259, + "rewards/margins": 0.008487561717629433, + "rewards/rejected": -0.0030466741882264614, + "step": 1710 + }, + { + "epoch": 1.239193083573487, + "grad_norm": 2.323777437210083, + "learning_rate": 3.6507289698834064e-08, + "logits/chosen": -1.4719207286834717, + "logits/rejected": -1.455365538597107, + "logps/chosen": -43.55077362060547, + "logps/rejected": -46.05921173095703, + "loss": 0.6891, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": 0.005178916268050671, + "rewards/margins": 0.008271681144833565, + "rewards/rejected": -0.003092765109613538, + "step": 1720 + }, + { + "epoch": 1.2463976945244957, + "grad_norm": 4.027398109436035, + "learning_rate": 3.6320804903743684e-08, + "logits/chosen": -1.5161298513412476, + "logits/rejected": -1.5111939907073975, + "logps/chosen": -45.40093231201172, + "logps/rejected": -49.17858123779297, + "loss": 0.6887, + "rewards/accuracies": 0.65625, + "rewards/chosen": 0.003557295771315694, + "rewards/margins": 0.009094839915633202, + "rewards/rejected": -0.005537544842809439, + "step": 1730 + }, + { + "epoch": 1.2536023054755043, + "grad_norm": 2.655517816543579, + "learning_rate": 3.61335243028146e-08, + "logits/chosen": -1.4958670139312744, + "logits/rejected": -1.489793062210083, + "logps/chosen": -48.872459411621094, + "logps/rejected": -51.61071014404297, + "loss": 0.6891, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.004584385547786951, + "rewards/margins": 0.008167969062924385, + "rewards/rejected": -0.003583582118153572, + "step": 1740 + }, + { + "epoch": 1.260806916426513, + "grad_norm": 3.2628414630889893, + "learning_rate": 3.5945461061099736e-08, + "logits/chosen": -1.4390538930892944, + "logits/rejected": -1.4079252481460571, + "logps/chosen": -50.76530075073242, + "logps/rejected": -49.67983627319336, + "loss": 0.6872, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.006600284017622471, + "rewards/margins": 0.012185483239591122, + "rewards/rejected": -0.005585200153291225, + "step": 1750 + }, + { + "epoch": 1.2680115273775217, + "grad_norm": 2.9704713821411133, + "learning_rate": 3.5756628398668446e-08, + "logits/chosen": -1.5589011907577515, + "logits/rejected": -1.5578845739364624, + "logps/chosen": -51.247100830078125, + "logps/rejected": -53.67809295654297, + "loss": 0.6887, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": 0.0037506825756281614, + "rewards/margins": 0.009181154891848564, + "rewards/rejected": -0.005430473946034908, + "step": 1760 + }, + { + "epoch": 1.2752161383285303, + "grad_norm": 2.6625916957855225, + "learning_rate": 3.556703958967716e-08, + "logits/chosen": -1.5575711727142334, + "logits/rejected": -1.5438311100006104, + "logps/chosen": -44.35518264770508, + "logps/rejected": -47.987945556640625, + "loss": 0.6896, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": 0.003589153289794922, + "rewards/margins": 0.007138341665267944, + "rewards/rejected": -0.0035491890739649534, + "step": 1770 + }, + { + "epoch": 1.282420749279539, + "grad_norm": 4.012426853179932, + "learning_rate": 3.5376707961436297e-08, + "logits/chosen": -1.5312955379486084, + "logits/rejected": -1.5150017738342285, + "logps/chosen": -53.351661682128906, + "logps/rejected": -53.443603515625, + "loss": 0.6907, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": 0.004278816748410463, + "rewards/margins": 0.004905478097498417, + "rewards/rejected": -0.0006266612326726317, + "step": 1780 + }, + { + "epoch": 1.2896253602305476, + "grad_norm": 2.424056053161621, + "learning_rate": 3.51856468934734e-08, + "logits/chosen": -1.4922749996185303, + "logits/rejected": -1.4954168796539307, + "logps/chosen": -46.36159896850586, + "logps/rejected": -48.64609146118164, + "loss": 0.6913, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": 0.004488944076001644, + "rewards/margins": 0.0038774623535573483, + "rewards/rejected": 0.0006114812567830086, + "step": 1790 + }, + { + "epoch": 1.2968299711815563, + "grad_norm": 3.357475519180298, + "learning_rate": 3.499386981659262e-08, + "logits/chosen": -1.5789316892623901, + "logits/rejected": -1.5703160762786865, + "logps/chosen": -45.465980529785156, + "logps/rejected": -51.676979064941406, + "loss": 0.689, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.006388810463249683, + "rewards/margins": 0.00840458832681179, + "rewards/rejected": -0.0020157776307314634, + "step": 1800 + }, + { + "epoch": 1.304034582132565, + "grad_norm": 2.5491249561309814, + "learning_rate": 3.480139021193057e-08, + "logits/chosen": -1.4629701375961304, + "logits/rejected": -1.4627655744552612, + "logps/chosen": -46.493797302246094, + "logps/rejected": -49.93988800048828, + "loss": 0.6896, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": 0.003896316047757864, + "rewards/margins": 0.007192063145339489, + "rewards/rejected": -0.003295747097581625, + "step": 1810 + }, + { + "epoch": 1.3112391930835736, + "grad_norm": 4.076901912689209, + "learning_rate": 3.4608221610008666e-08, + "logits/chosen": -1.5544006824493408, + "logits/rejected": -1.5443475246429443, + "logps/chosen": -40.67546081542969, + "logps/rejected": -45.340274810791016, + "loss": 0.6877, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0057144188322126865, + "rewards/margins": 0.011151134967803955, + "rewards/rejected": -0.005436715669929981, + "step": 1820 + }, + { + "epoch": 1.318443804034582, + "grad_norm": 2.311035394668579, + "learning_rate": 3.4414377589782e-08, + "logits/chosen": -1.4895527362823486, + "logits/rejected": -1.4889047145843506, + "logps/chosen": -44.287559509277344, + "logps/rejected": -46.684104919433594, + "loss": 0.6891, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": 0.003038702066987753, + "rewards/margins": 0.00834103673696518, + "rewards/rejected": -0.005302335135638714, + "step": 1830 + }, + { + "epoch": 1.3256484149855907, + "grad_norm": 2.245774745941162, + "learning_rate": 3.4219871777684745e-08, + "logits/chosen": -1.5047295093536377, + "logits/rejected": -1.4803383350372314, + "logps/chosen": -48.24694061279297, + "logps/rejected": -49.6685791015625, + "loss": 0.6888, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": 0.004406126216053963, + "rewards/margins": 0.008816715329885483, + "rewards/rejected": -0.00441058911383152, + "step": 1840 + }, + { + "epoch": 1.3328530259365994, + "grad_norm": 3.1291747093200684, + "learning_rate": 3.4024717846672364e-08, + "logits/chosen": -1.5541332960128784, + "logits/rejected": -1.540783166885376, + "logps/chosen": -43.84444808959961, + "logps/rejected": -47.0974235534668, + "loss": 0.6885, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": 0.003276639385148883, + "rewards/margins": 0.009581932798027992, + "rewards/rejected": -0.006305294577032328, + "step": 1850 + }, + { + "epoch": 1.340057636887608, + "grad_norm": 3.2411158084869385, + "learning_rate": 3.382892951526036e-08, + "logits/chosen": -1.5086630582809448, + "logits/rejected": -1.498652696609497, + "logps/chosen": -48.55314636230469, + "logps/rejected": -53.514312744140625, + "loss": 0.688, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.004771741572767496, + "rewards/margins": 0.010389812290668488, + "rewards/rejected": -0.005618072114884853, + "step": 1860 + }, + { + "epoch": 1.3472622478386167, + "grad_norm": 3.0738956928253174, + "learning_rate": 3.3632520546559974e-08, + "logits/chosen": -1.477526307106018, + "logits/rejected": -1.4502986669540405, + "logps/chosen": -42.176910400390625, + "logps/rejected": -46.28841781616211, + "loss": 0.6882, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": 0.005257411859929562, + "rewards/margins": 0.009985310956835747, + "rewards/rejected": -0.004727899096906185, + "step": 1870 + }, + { + "epoch": 1.3544668587896254, + "grad_norm": 3.4176902770996094, + "learning_rate": 3.34355047473107e-08, + "logits/chosen": -1.509404182434082, + "logits/rejected": -1.4928141832351685, + "logps/chosen": -49.149658203125, + "logps/rejected": -50.33115768432617, + "loss": 0.6892, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": 0.0034352585207670927, + "rewards/margins": 0.008046688511967659, + "rewards/rejected": -0.004611429758369923, + "step": 1880 + }, + { + "epoch": 1.361671469740634, + "grad_norm": 3.2581231594085693, + "learning_rate": 3.323789596690971e-08, + "logits/chosen": -1.4438790082931519, + "logits/rejected": -1.4406137466430664, + "logps/chosen": -46.02549362182617, + "logps/rejected": -50.36063766479492, + "loss": 0.6886, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.004354453645646572, + "rewards/margins": 0.009347590617835522, + "rewards/rejected": -0.004993138834834099, + "step": 1890 + }, + { + "epoch": 1.3688760806916427, + "grad_norm": 2.03442645072937, + "learning_rate": 3.303970809643828e-08, + "logits/chosen": -1.5256521701812744, + "logits/rejected": -1.5279293060302734, + "logps/chosen": -45.331443786621094, + "logps/rejected": -49.03801727294922, + "loss": 0.6891, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.00544341467320919, + "rewards/margins": 0.00833254773169756, + "rewards/rejected": -0.0028891332913190126, + "step": 1900 + }, + { + "epoch": 1.3760806916426513, + "grad_norm": 3.0210492610931396, + "learning_rate": 3.2840955067685356e-08, + "logits/chosen": -1.5634491443634033, + "logits/rejected": -1.5632786750793457, + "logps/chosen": -46.0203742980957, + "logps/rejected": -50.457698822021484, + "loss": 0.6876, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.005104956682771444, + "rewards/margins": 0.011318376287817955, + "rewards/rejected": -0.006213418673723936, + "step": 1910 + }, + { + "epoch": 1.38328530259366, + "grad_norm": 2.749630928039551, + "learning_rate": 3.264165085216817e-08, + "logits/chosen": -1.5801355838775635, + "logits/rejected": -1.5726850032806396, + "logps/chosen": -38.53851318359375, + "logps/rejected": -43.82078170776367, + "loss": 0.6888, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": 0.004419215954840183, + "rewards/margins": 0.008898451924324036, + "rewards/rejected": -0.004479236900806427, + "step": 1920 + }, + { + "epoch": 1.3904899135446687, + "grad_norm": 4.1981916427612305, + "learning_rate": 3.244180946015008e-08, + "logits/chosen": -1.444226861000061, + "logits/rejected": -1.4355494976043701, + "logps/chosen": -52.143287658691406, + "logps/rejected": -53.8681755065918, + "loss": 0.6898, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": 0.004708580207079649, + "rewards/margins": 0.006951368413865566, + "rewards/rejected": -0.0022427875082939863, + "step": 1930 + }, + { + "epoch": 1.397694524495677, + "grad_norm": 2.518497943878174, + "learning_rate": 3.224144493965578e-08, + "logits/chosen": -1.5799140930175781, + "logits/rejected": -1.5780613422393799, + "logps/chosen": -43.635032653808594, + "logps/rejected": -45.74082565307617, + "loss": 0.6892, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": 0.00375853362493217, + "rewards/margins": 0.008155545219779015, + "rewards/rejected": -0.004397011827677488, + "step": 1940 + }, + { + "epoch": 1.4048991354466858, + "grad_norm": 2.8173162937164307, + "learning_rate": 3.204057137548371e-08, + "logits/chosen": -1.5313746929168701, + "logits/rejected": -1.5233030319213867, + "logps/chosen": -43.7186164855957, + "logps/rejected": -47.32202911376953, + "loss": 0.6878, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": 0.0038849213160574436, + "rewards/margins": 0.011041805148124695, + "rewards/rejected": -0.007156885229051113, + "step": 1950 + }, + { + "epoch": 1.4121037463976944, + "grad_norm": 3.7198755741119385, + "learning_rate": 3.183920288821597e-08, + "logits/chosen": -1.4899612665176392, + "logits/rejected": -1.4815582036972046, + "logps/chosen": -45.28757095336914, + "logps/rejected": -49.970062255859375, + "loss": 0.6872, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": 0.004886112175881863, + "rewards/margins": 0.012144430540502071, + "rewards/rejected": -0.007258318364620209, + "step": 1960 + }, + { + "epoch": 1.419308357348703, + "grad_norm": 3.8075971603393555, + "learning_rate": 3.1637353633225735e-08, + "logits/chosen": -1.5398638248443604, + "logits/rejected": -1.529147744178772, + "logps/chosen": -41.222007751464844, + "logps/rejected": -45.725135803222656, + "loss": 0.6868, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": 0.003631623461842537, + "rewards/margins": 0.013042435050010681, + "rewards/rejected": -0.009410811588168144, + "step": 1970 + }, + { + "epoch": 1.4265129682997117, + "grad_norm": 3.284583330154419, + "learning_rate": 3.143503779968213e-08, + "logits/chosen": -1.5069071054458618, + "logits/rejected": -1.5070630311965942, + "logps/chosen": -45.44756317138672, + "logps/rejected": -49.775611877441406, + "loss": 0.6891, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": 0.0011150476057082415, + "rewards/margins": 0.008462509140372276, + "rewards/rejected": -0.00734746316447854, + "step": 1980 + }, + { + "epoch": 1.4337175792507204, + "grad_norm": 3.3477249145507812, + "learning_rate": 3.1232269609552875e-08, + "logits/chosen": -1.518206000328064, + "logits/rejected": -1.507889986038208, + "logps/chosen": -43.67546463012695, + "logps/rejected": -46.17768096923828, + "loss": 0.6887, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": 0.0041655306704342365, + "rewards/margins": 0.009150232188403606, + "rewards/rejected": -0.004984701983630657, + "step": 1990 + }, + { + "epoch": 1.440922190201729, + "grad_norm": 2.176100969314575, + "learning_rate": 3.102906331660444e-08, + "logits/chosen": -1.5566879510879517, + "logits/rejected": -1.5430500507354736, + "logps/chosen": -41.93559646606445, + "logps/rejected": -48.24177932739258, + "loss": 0.6867, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": 0.005309706088155508, + "rewards/margins": 0.013114815577864647, + "rewards/rejected": -0.007805109955370426, + "step": 2000 + }, + { + "epoch": 1.4481268011527377, + "grad_norm": 3.1868534088134766, + "learning_rate": 3.082543320540015e-08, + "logits/chosen": -1.469954252243042, + "logits/rejected": -1.4548522233963013, + "logps/chosen": -43.87824249267578, + "logps/rejected": -47.534263610839844, + "loss": 0.6883, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.00238056224770844, + "rewards/margins": 0.009992104955017567, + "rewards/rejected": -0.007611544337123632, + "step": 2010 + }, + { + "epoch": 1.4553314121037464, + "grad_norm": 4.190840721130371, + "learning_rate": 3.062139359029599e-08, + "logits/chosen": -1.5576965808868408, + "logits/rejected": -1.5537203550338745, + "logps/chosen": -46.4661979675293, + "logps/rejected": -48.89842224121094, + "loss": 0.6883, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": 0.0034370776265859604, + "rewards/margins": 0.010012554004788399, + "rewards/rejected": -0.006575475446879864, + "step": 2020 + }, + { + "epoch": 1.462536023054755, + "grad_norm": 3.4175453186035156, + "learning_rate": 3.041695881443437e-08, + "logits/chosen": -1.5763541460037231, + "logits/rejected": -1.5677953958511353, + "logps/chosen": -46.359901428222656, + "logps/rejected": -50.328311920166016, + "loss": 0.6901, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": 0.003387346165254712, + "rewards/margins": 0.006299160420894623, + "rewards/rejected": -0.0029118142556399107, + "step": 2030 + }, + { + "epoch": 1.4697406340057637, + "grad_norm": 4.04388427734375, + "learning_rate": 3.0212143248735886e-08, + "logits/chosen": -1.5315358638763428, + "logits/rejected": -1.5291774272918701, + "logps/chosen": -49.797996520996094, + "logps/rejected": -54.427490234375, + "loss": 0.6878, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": 0.003973301034420729, + "rewards/margins": 0.011022644117474556, + "rewards/rejected": -0.007049343083053827, + "step": 2040 + }, + { + "epoch": 1.4769452449567724, + "grad_norm": 3.1518523693084717, + "learning_rate": 3.0006961290889077e-08, + "logits/chosen": -1.5215927362442017, + "logits/rejected": -1.496594786643982, + "logps/chosen": -50.674774169921875, + "logps/rejected": -53.18156051635742, + "loss": 0.6876, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.004915239289402962, + "rewards/margins": 0.011425209231674671, + "rewards/rejected": -0.0065099699422717094, + "step": 2050 + }, + { + "epoch": 1.484149855907781, + "grad_norm": 2.6612987518310547, + "learning_rate": 2.980142736433833e-08, + "logits/chosen": -1.547136902809143, + "logits/rejected": -1.5229747295379639, + "logps/chosen": -44.282020568847656, + "logps/rejected": -44.51871871948242, + "loss": 0.6885, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": 0.0016722548753023148, + "rewards/margins": 0.009514597244560719, + "rewards/rejected": -0.00784234143793583, + "step": 2060 + }, + { + "epoch": 1.4913544668587897, + "grad_norm": 4.097959518432617, + "learning_rate": 2.9595555917269997e-08, + "logits/chosen": -1.5567786693572998, + "logits/rejected": -1.529726266860962, + "logps/chosen": -51.577598571777344, + "logps/rejected": -53.3016471862793, + "loss": 0.6884, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.0018792494665831327, + "rewards/margins": 0.009681441821157932, + "rewards/rejected": -0.0078021930530667305, + "step": 2070 + }, + { + "epoch": 1.4985590778097984, + "grad_norm": 3.1386377811431885, + "learning_rate": 2.9389361421596725e-08, + "logits/chosen": -1.435152292251587, + "logits/rejected": -1.4320685863494873, + "logps/chosen": -49.169029235839844, + "logps/rejected": -53.54350662231445, + "loss": 0.687, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.004540695808827877, + "rewards/margins": 0.012549139559268951, + "rewards/rejected": -0.008008443750441074, + "step": 2080 + }, + { + "epoch": 1.505763688760807, + "grad_norm": 2.573927640914917, + "learning_rate": 2.9182858371940126e-08, + "logits/chosen": -1.5321205854415894, + "logits/rejected": -1.5178864002227783, + "logps/chosen": -42.592002868652344, + "logps/rejected": -46.14876174926758, + "loss": 0.6866, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.003068184945732355, + "rewards/margins": 0.013397050090134144, + "rewards/rejected": -0.01032886654138565, + "step": 2090 + }, + { + "epoch": 1.5129682997118157, + "grad_norm": 3.6754095554351807, + "learning_rate": 2.8976061284611908e-08, + "logits/chosen": -1.4699745178222656, + "logits/rejected": -1.4811229705810547, + "logps/chosen": -41.61306381225586, + "logps/rejected": -45.29481506347656, + "loss": 0.6877, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.004881677217781544, + "rewards/margins": 0.0111152408644557, + "rewards/rejected": -0.006233563646674156, + "step": 2100 + }, + { + "epoch": 1.5201729106628243, + "grad_norm": 3.2631003856658936, + "learning_rate": 2.8768984696593384e-08, + "logits/chosen": -1.4801521301269531, + "logits/rejected": -1.4636070728302002, + "logps/chosen": -44.76102828979492, + "logps/rejected": -47.91448211669922, + "loss": 0.6868, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": 0.005100742448121309, + "rewards/margins": 0.012960699386894703, + "rewards/rejected": -0.007859956473112106, + "step": 2110 + }, + { + "epoch": 1.527377521613833, + "grad_norm": 3.2338225841522217, + "learning_rate": 2.8561643164513637e-08, + "logits/chosen": -1.3346589803695679, + "logits/rejected": -1.318894386291504, + "logps/chosen": -51.97467803955078, + "logps/rejected": -54.27916717529297, + "loss": 0.6889, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": 0.003795693162828684, + "rewards/margins": 0.008763305842876434, + "rewards/rejected": -0.0049676112830638885, + "step": 2120 + }, + { + "epoch": 1.5345821325648417, + "grad_norm": 3.310194253921509, + "learning_rate": 2.8354051263626227e-08, + "logits/chosen": -1.4670060873031616, + "logits/rejected": -1.4673092365264893, + "logps/chosen": -50.2790641784668, + "logps/rejected": -52.83305740356445, + "loss": 0.6885, + "rewards/accuracies": 0.59375, + "rewards/chosen": 0.0032279961742460728, + "rewards/margins": 0.00951780378818512, + "rewards/rejected": -0.0062898085452616215, + "step": 2130 + }, + { + "epoch": 1.54178674351585, + "grad_norm": 4.669538497924805, + "learning_rate": 2.8146223586784573e-08, + "logits/chosen": -1.4577587842941284, + "logits/rejected": -1.4446860551834106, + "logps/chosen": -52.09587860107422, + "logps/rejected": -54.9838981628418, + "loss": 0.6872, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": 0.003598356619477272, + "rewards/margins": 0.012193666771054268, + "rewards/rejected": -0.008595308288931847, + "step": 2140 + }, + { + "epoch": 1.5489913544668588, + "grad_norm": 3.397944450378418, + "learning_rate": 2.7938174743416205e-08, + "logits/chosen": -1.362642765045166, + "logits/rejected": -1.3556791543960571, + "logps/chosen": -51.507118225097656, + "logps/rejected": -55.4406852722168, + "loss": 0.6877, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": 0.0027351544704288244, + "rewards/margins": 0.011117152869701385, + "rewards/rejected": -0.008381998166441917, + "step": 2150 + }, + { + "epoch": 1.5561959654178674, + "grad_norm": 3.0584213733673096, + "learning_rate": 2.7729919358495728e-08, + "logits/chosen": -1.5042486190795898, + "logits/rejected": -1.4946848154067993, + "logps/chosen": -52.332984924316406, + "logps/rejected": -53.52936935424805, + "loss": 0.687, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": 0.0036446847952902317, + "rewards/margins": 0.012581204064190388, + "rewards/rejected": -0.008936519734561443, + "step": 2160 + }, + { + "epoch": 1.563400576368876, + "grad_norm": 3.8265929222106934, + "learning_rate": 2.7521472071516772e-08, + "logits/chosen": -1.4729335308074951, + "logits/rejected": -1.4664169549942017, + "logps/chosen": -43.67477035522461, + "logps/rejected": -47.45640182495117, + "loss": 0.6882, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": 0.005746934562921524, + "rewards/margins": 0.010086534544825554, + "rewards/rejected": -0.0043396009132266045, + "step": 2170 + }, + { + "epoch": 1.5706051873198847, + "grad_norm": 3.8936140537261963, + "learning_rate": 2.731284753546289e-08, + "logits/chosen": -1.4814698696136475, + "logits/rejected": -1.4747785329818726, + "logps/chosen": -53.028472900390625, + "logps/rejected": -56.810150146484375, + "loss": 0.6893, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.00028875062707811594, + "rewards/margins": 0.007881390862166882, + "rewards/rejected": -0.008170142769813538, + "step": 2180 + }, + { + "epoch": 1.5778097982708934, + "grad_norm": 4.052019119262695, + "learning_rate": 2.710406041577751e-08, + "logits/chosen": -1.5514274835586548, + "logits/rejected": -1.5482257604599, + "logps/chosen": -47.992862701416016, + "logps/rejected": -53.79439163208008, + "loss": 0.6881, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": 0.004318216349929571, + "rewards/margins": 0.010280657559633255, + "rewards/rejected": -0.005962441675364971, + "step": 2190 + }, + { + "epoch": 1.585014409221902, + "grad_norm": 3.321918487548828, + "learning_rate": 2.6895125389333017e-08, + "logits/chosen": -1.537071943283081, + "logits/rejected": -1.5222880840301514, + "logps/chosen": -48.4653434753418, + "logps/rejected": -52.60175704956055, + "loss": 0.6853, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.006970447953790426, + "rewards/margins": 0.015999775379896164, + "rewards/rejected": -0.009029326029121876, + "step": 2200 + }, + { + "epoch": 1.5922190201729105, + "grad_norm": 3.160768508911133, + "learning_rate": 2.6686057143399028e-08, + "logits/chosen": -1.5062367916107178, + "logits/rejected": -1.4982922077178955, + "logps/chosen": -48.509521484375, + "logps/rejected": -50.001853942871094, + "loss": 0.6882, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": 0.004941598977893591, + "rewards/margins": 0.010229108855128288, + "rewards/rejected": -0.00528750941157341, + "step": 2210 + }, + { + "epoch": 1.5994236311239192, + "grad_norm": 3.687347888946533, + "learning_rate": 2.647687037460996e-08, + "logits/chosen": -1.4847967624664307, + "logits/rejected": -1.4772911071777344, + "logps/chosen": -52.836029052734375, + "logps/rejected": -58.44087600708008, + "loss": 0.6861, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": 0.006661810912191868, + "rewards/margins": 0.014402936212718487, + "rewards/rejected": -0.007741124834865332, + "step": 2220 + }, + { + "epoch": 1.6066282420749278, + "grad_norm": 3.245283365249634, + "learning_rate": 2.626757978793187e-08, + "logits/chosen": -1.5062129497528076, + "logits/rejected": -1.4990017414093018, + "logps/chosen": -48.865169525146484, + "logps/rejected": -52.4692497253418, + "loss": 0.6888, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.00014934074715711176, + "rewards/margins": 0.008889252319931984, + "rewards/rejected": -0.009038591757416725, + "step": 2230 + }, + { + "epoch": 1.6138328530259365, + "grad_norm": 2.9634509086608887, + "learning_rate": 2.6058200095628797e-08, + "logits/chosen": -1.5059670209884644, + "logits/rejected": -1.5062869787216187, + "logps/chosen": -40.85955047607422, + "logps/rejected": -46.7325325012207, + "loss": 0.685, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.004564112052321434, + "rewards/margins": 0.016638968139886856, + "rewards/rejected": -0.012074857018887997, + "step": 2240 + }, + { + "epoch": 1.6210374639769451, + "grad_norm": 3.270120143890381, + "learning_rate": 2.584874601622854e-08, + "logits/chosen": -1.566474199295044, + "logits/rejected": -1.5492124557495117, + "logps/chosen": -49.3926887512207, + "logps/rejected": -53.256019592285156, + "loss": 0.6888, + "rewards/accuracies": 0.59375, + "rewards/chosen": 0.002326581161469221, + "rewards/margins": 0.008950329385697842, + "rewards/rejected": -0.006623747758567333, + "step": 2250 + }, + { + "epoch": 1.6282420749279538, + "grad_norm": 3.055609941482544, + "learning_rate": 2.5639232273487993e-08, + "logits/chosen": -1.4605876207351685, + "logits/rejected": -1.4407970905303955, + "logps/chosen": -44.329551696777344, + "logps/rejected": -47.6479377746582, + "loss": 0.6878, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": 0.004232374019920826, + "rewards/margins": 0.011020188219845295, + "rewards/rejected": -0.006787814199924469, + "step": 2260 + }, + { + "epoch": 1.6354466858789625, + "grad_norm": 3.6103997230529785, + "learning_rate": 2.5429673595358142e-08, + "logits/chosen": -1.5238250494003296, + "logits/rejected": -1.5089406967163086, + "logps/chosen": -45.79985809326172, + "logps/rejected": -48.57173156738281, + "loss": 0.6878, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": 0.0016669733449816704, + "rewards/margins": 0.011048417538404465, + "rewards/rejected": -0.009381445124745369, + "step": 2270 + }, + { + "epoch": 1.6426512968299711, + "grad_norm": 3.2970402240753174, + "learning_rate": 2.5220084712948764e-08, + "logits/chosen": -1.4582303762435913, + "logits/rejected": -1.4462206363677979, + "logps/chosen": -52.10322952270508, + "logps/rejected": -55.221832275390625, + "loss": 0.6898, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": 0.0028122267685830593, + "rewards/margins": 0.006987334694713354, + "rewards/rejected": -0.004175108857452869, + "step": 2280 + }, + { + "epoch": 1.6498559077809798, + "grad_norm": 3.7503063678741455, + "learning_rate": 2.5010480359492838e-08, + "logits/chosen": -1.4644200801849365, + "logits/rejected": -1.4521420001983643, + "logps/chosen": -49.4357795715332, + "logps/rejected": -49.438568115234375, + "loss": 0.686, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": 0.0039522526785731316, + "rewards/margins": 0.014734050258994102, + "rewards/rejected": -0.010781797580420971, + "step": 2290 + }, + { + "epoch": 1.6570605187319885, + "grad_norm": 2.9756903648376465, + "learning_rate": 2.480087526931091e-08, + "logits/chosen": -1.5041756629943848, + "logits/rejected": -1.4846851825714111, + "logps/chosen": -43.37569046020508, + "logps/rejected": -45.38811111450195, + "loss": 0.6863, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": 0.003738057566806674, + "rewards/margins": 0.01406602282077074, + "rewards/rejected": -0.010327964089810848, + "step": 2300 + }, + { + "epoch": 1.6642651296829971, + "grad_norm": 3.4368813037872314, + "learning_rate": 2.4591284176775326e-08, + "logits/chosen": -1.44768226146698, + "logits/rejected": -1.4357998371124268, + "logps/chosen": -55.06958770751953, + "logps/rejected": -56.48256301879883, + "loss": 0.689, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": 0.0038170614279806614, + "rewards/margins": 0.008528480306267738, + "rewards/rejected": -0.00471141841262579, + "step": 2310 + }, + { + "epoch": 1.6714697406340058, + "grad_norm": 2.8778862953186035, + "learning_rate": 2.4381721815274443e-08, + "logits/chosen": -1.5203001499176025, + "logits/rejected": -1.5137100219726562, + "logps/chosen": -43.21926498413086, + "logps/rejected": -46.39662551879883, + "loss": 0.687, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.0016496287425979972, + "rewards/margins": 0.012687856331467628, + "rewards/rejected": -0.011038227006793022, + "step": 2320 + }, + { + "epoch": 1.6786743515850144, + "grad_norm": 3.1286635398864746, + "learning_rate": 2.4172202916176936e-08, + "logits/chosen": -1.5638360977172852, + "logits/rejected": -1.5550332069396973, + "logps/chosen": -43.0087776184082, + "logps/rejected": -47.81661605834961, + "loss": 0.6861, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": 0.0009016372496262193, + "rewards/margins": 0.014661784283816814, + "rewards/rejected": -0.01376014668494463, + "step": 2330 + }, + { + "epoch": 1.685878962536023, + "grad_norm": 3.64982271194458, + "learning_rate": 2.3962742207796268e-08, + "logits/chosen": -1.4485204219818115, + "logits/rejected": -1.4384351968765259, + "logps/chosen": -41.6459846496582, + "logps/rejected": -45.627601623535156, + "loss": 0.6849, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.004887954797595739, + "rewards/margins": 0.016865257173776627, + "rewards/rejected": -0.01197730004787445, + "step": 2340 + }, + { + "epoch": 1.6930835734870318, + "grad_norm": 3.615983247756958, + "learning_rate": 2.3753354414355334e-08, + "logits/chosen": -1.4232791662216187, + "logits/rejected": -1.400564432144165, + "logps/chosen": -53.550025939941406, + "logps/rejected": -55.16486358642578, + "loss": 0.6875, + "rewards/accuracies": 0.59375, + "rewards/chosen": 0.0003485208726488054, + "rewards/margins": 0.011735951527953148, + "rewards/rejected": -0.01138742920011282, + "step": 2350 + }, + { + "epoch": 1.7002881844380404, + "grad_norm": 3.3526298999786377, + "learning_rate": 2.3544054254951408e-08, + "logits/chosen": -1.4648194313049316, + "logits/rejected": -1.4448609352111816, + "logps/chosen": -42.864173889160156, + "logps/rejected": -48.36962127685547, + "loss": 0.6845, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.004195074550807476, + "rewards/margins": 0.01782260462641716, + "rewards/rejected": -0.01362752728164196, + "step": 2360 + }, + { + "epoch": 1.707492795389049, + "grad_norm": 3.3519835472106934, + "learning_rate": 2.3334856442521435e-08, + "logits/chosen": -1.5586761236190796, + "logits/rejected": -1.5402878522872925, + "logps/chosen": -51.24225616455078, + "logps/rejected": -51.37165451049805, + "loss": 0.688, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": 0.0026316409930586815, + "rewards/margins": 0.01046125777065754, + "rewards/rejected": -0.007829615846276283, + "step": 2370 + }, + { + "epoch": 1.7146974063400577, + "grad_norm": 3.329519033432007, + "learning_rate": 2.3125775682807826e-08, + "logits/chosen": -1.5537148714065552, + "logits/rejected": -1.5523698329925537, + "logps/chosen": -49.91655349731445, + "logps/rejected": -53.717132568359375, + "loss": 0.6862, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": 0.0029881394002586603, + "rewards/margins": 0.014401605352759361, + "rewards/rejected": -0.01141346711665392, + "step": 2380 + }, + { + "epoch": 1.7219020172910664, + "grad_norm": 2.7364284992218018, + "learning_rate": 2.291682667332464e-08, + "logits/chosen": -1.6134541034698486, + "logits/rejected": -1.5998890399932861, + "logps/chosen": -46.44415283203125, + "logps/rejected": -49.58192825317383, + "loss": 0.6887, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": 0.0009153697756119072, + "rewards/margins": 0.009135574102401733, + "rewards/rejected": -0.008220205083489418, + "step": 2390 + }, + { + "epoch": 1.729106628242075, + "grad_norm": 2.911412477493286, + "learning_rate": 2.2708024102324454e-08, + "logits/chosen": -1.5305463075637817, + "logits/rejected": -1.5252015590667725, + "logps/chosen": -46.698795318603516, + "logps/rejected": -51.687110900878906, + "loss": 0.6853, + "rewards/accuracies": 0.59375, + "rewards/chosen": 0.0034352089278399944, + "rewards/margins": 0.016118764877319336, + "rewards/rejected": -0.012683555483818054, + "step": 2400 + }, + { + "epoch": 1.7363112391930837, + "grad_norm": 3.7019219398498535, + "learning_rate": 2.2499382647765797e-08, + "logits/chosen": -1.4968922138214111, + "logits/rejected": -1.4982818365097046, + "logps/chosen": -48.41173553466797, + "logps/rejected": -51.977378845214844, + "loss": 0.6876, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": 8.274018910015002e-05, + "rewards/margins": 0.011399459093809128, + "rewards/rejected": -0.01131671853363514, + "step": 2410 + }, + { + "epoch": 1.7435158501440924, + "grad_norm": 2.8830807209014893, + "learning_rate": 2.2290916976281427e-08, + "logits/chosen": -1.4777326583862305, + "logits/rejected": -1.4632737636566162, + "logps/chosen": -43.695831298828125, + "logps/rejected": -46.05553436279297, + "loss": 0.6863, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.00010001249756896868, + "rewards/margins": 0.014169926755130291, + "rewards/rejected": -0.014269940555095673, + "step": 2420 + }, + { + "epoch": 1.7507204610951008, + "grad_norm": 3.572896957397461, + "learning_rate": 2.2082641742147238e-08, + "logits/chosen": -1.4719898700714111, + "logits/rejected": -1.4631872177124023, + "logps/chosen": -45.68647766113281, + "logps/rejected": -51.5733757019043, + "loss": 0.6867, + "rewards/accuracies": 0.5625, + "rewards/chosen": 0.0008903827401809394, + "rewards/margins": 0.013410898856818676, + "rewards/rejected": -0.01252051629126072, + "step": 2430 + }, + { + "epoch": 1.7579250720461095, + "grad_norm": 3.098632574081421, + "learning_rate": 2.1874571586252177e-08, + "logits/chosen": -1.5461061000823975, + "logits/rejected": -1.534361481666565, + "logps/chosen": -45.58122634887695, + "logps/rejected": -48.32319259643555, + "loss": 0.6878, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": 0.0007744136964902282, + "rewards/margins": 0.010953008197247982, + "rewards/rejected": -0.010178593918681145, + "step": 2440 + }, + { + "epoch": 1.7651296829971181, + "grad_norm": 2.4132089614868164, + "learning_rate": 2.1666721135069037e-08, + "logits/chosen": -1.515826940536499, + "logits/rejected": -1.5021852254867554, + "logps/chosen": -49.859291076660156, + "logps/rejected": -51.296424865722656, + "loss": 0.6876, + "rewards/accuracies": 0.5625, + "rewards/chosen": 0.0015759628731757402, + "rewards/margins": 0.011497067287564278, + "rewards/rejected": -0.009921105578541756, + "step": 2450 + }, + { + "epoch": 1.7723342939481268, + "grad_norm": 2.642758846282959, + "learning_rate": 2.145910499962628e-08, + "logits/chosen": -1.5767768621444702, + "logits/rejected": -1.555633544921875, + "logps/chosen": -43.99817657470703, + "logps/rejected": -46.1553955078125, + "loss": 0.6847, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": 0.0037842721212655306, + "rewards/margins": 0.017482485622167587, + "rewards/rejected": -0.013698210939764977, + "step": 2460 + }, + { + "epoch": 1.7795389048991355, + "grad_norm": 3.895792245864868, + "learning_rate": 2.1251737774480915e-08, + "logits/chosen": -1.5495898723602295, + "logits/rejected": -1.5402934551239014, + "logps/chosen": -53.23638916015625, + "logps/rejected": -55.35784912109375, + "loss": 0.6872, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": 0.0016384575283154845, + "rewards/margins": 0.012212954461574554, + "rewards/rejected": -0.010574499145150185, + "step": 2470 + }, + { + "epoch": 1.7867435158501441, + "grad_norm": 2.5514557361602783, + "learning_rate": 2.104463403669264e-08, + "logits/chosen": -1.4776699542999268, + "logits/rejected": -1.4571102857589722, + "logps/chosen": -49.018821716308594, + "logps/rejected": -51.24462127685547, + "loss": 0.6864, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.0007808968657627702, + "rewards/margins": 0.013993730768561363, + "rewards/rejected": -0.013212834484875202, + "step": 2480 + }, + { + "epoch": 1.7939481268011528, + "grad_norm": 2.6605591773986816, + "learning_rate": 2.0837808344799028e-08, + "logits/chosen": -1.452739953994751, + "logits/rejected": -1.4367029666900635, + "logps/chosen": -43.84040832519531, + "logps/rejected": -47.514102935791016, + "loss": 0.6839, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": 0.007030940148979425, + "rewards/margins": 0.01895783096551895, + "rewards/rejected": -0.011926891282200813, + "step": 2490 + }, + { + "epoch": 1.8011527377521612, + "grad_norm": 3.2000277042388916, + "learning_rate": 2.063127523779219e-08, + "logits/chosen": -1.4303696155548096, + "logits/rejected": -1.429574728012085, + "logps/chosen": -44.83720016479492, + "logps/rejected": -51.28551483154297, + "loss": 0.6836, + "rewards/accuracies": 0.71875, + "rewards/chosen": 0.003173623699694872, + "rewards/margins": 0.01957201212644577, + "rewards/rejected": -0.016398390755057335, + "step": 2500 + }, + { + "epoch": 1.8083573487031699, + "grad_norm": 3.828165054321289, + "learning_rate": 2.0425049234096737e-08, + "logits/chosen": -1.486255407333374, + "logits/rejected": -1.4714888334274292, + "logps/chosen": -49.1246452331543, + "logps/rejected": -51.77213668823242, + "loss": 0.6863, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": 7.560032099718228e-05, + "rewards/margins": 0.014201045036315918, + "rewards/rejected": -0.014125445857644081, + "step": 2510 + }, + { + "epoch": 1.8155619596541785, + "grad_norm": 2.640105962753296, + "learning_rate": 2.0219144830549163e-08, + "logits/chosen": -1.4610233306884766, + "logits/rejected": -1.4512735605239868, + "logps/chosen": -48.97557067871094, + "logps/rejected": -52.63092803955078, + "loss": 0.6849, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.001075071981176734, + "rewards/margins": 0.017014745622873306, + "rewards/rejected": -0.015939675271511078, + "step": 2520 + }, + { + "epoch": 1.8227665706051872, + "grad_norm": 2.893141269683838, + "learning_rate": 2.0013576501378823e-08, + "logits/chosen": -1.4366611242294312, + "logits/rejected": -1.4269273281097412, + "logps/chosen": -44.668251037597656, + "logps/rejected": -48.838382720947266, + "loss": 0.6818, + "rewards/accuracies": 0.65625, + "rewards/chosen": 0.007614838890731335, + "rewards/margins": 0.023311858996748924, + "rewards/rejected": -0.015697021037340164, + "step": 2530 + }, + { + "epoch": 1.8299711815561959, + "grad_norm": 3.5534820556640625, + "learning_rate": 1.9808358697190426e-08, + "logits/chosen": -1.462631344795227, + "logits/rejected": -1.4623886346817017, + "logps/chosen": -39.994834899902344, + "logps/rejected": -45.32501220703125, + "loss": 0.6846, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.0002226762444479391, + "rewards/margins": 0.017688129097223282, + "rewards/rejected": -0.017910804599523544, + "step": 2540 + }, + { + "epoch": 1.8371757925072045, + "grad_norm": 3.05342698097229, + "learning_rate": 1.9603505843948214e-08, + "logits/chosen": -1.4901165962219238, + "logits/rejected": -1.4695804119110107, + "logps/chosen": -41.027496337890625, + "logps/rejected": -46.306419372558594, + "loss": 0.6864, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": 0.0012056769337505102, + "rewards/margins": 0.013855445198714733, + "rewards/rejected": -0.012649768963456154, + "step": 2550 + }, + { + "epoch": 1.8443804034582132, + "grad_norm": 3.029517412185669, + "learning_rate": 1.9399032341961886e-08, + "logits/chosen": -1.4612455368041992, + "logits/rejected": -1.4414364099502563, + "logps/chosen": -44.05613708496094, + "logps/rejected": -45.91584777832031, + "loss": 0.6869, + "rewards/accuracies": 0.59375, + "rewards/chosen": 0.0033496886026114225, + "rewards/margins": 0.012948046438395977, + "rewards/rejected": -0.009598356671631336, + "step": 2560 + }, + { + "epoch": 1.8515850144092219, + "grad_norm": 3.660400867462158, + "learning_rate": 1.9194952564874323e-08, + "logits/chosen": -1.490746259689331, + "logits/rejected": -1.478116750717163, + "logps/chosen": -49.373924255371094, + "logps/rejected": -52.70782470703125, + "loss": 0.686, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.0015121384058147669, + "rewards/margins": 0.014797654934227467, + "rewards/rejected": -0.013285515829920769, + "step": 2570 + }, + { + "epoch": 1.8587896253602305, + "grad_norm": 2.9589059352874756, + "learning_rate": 1.8991280858651157e-08, + "logits/chosen": -1.4661362171173096, + "logits/rejected": -1.4451956748962402, + "logps/chosen": -48.044097900390625, + "logps/rejected": -49.68541717529297, + "loss": 0.6863, + "rewards/accuracies": 0.5625, + "rewards/chosen": 0.000995612470433116, + "rewards/margins": 0.014148500747978687, + "rewards/rejected": -0.013152887113392353, + "step": 2580 + }, + { + "epoch": 1.8659942363112392, + "grad_norm": 3.7703192234039307, + "learning_rate": 1.8788031540572327e-08, + "logits/chosen": -1.4331156015396118, + "logits/rejected": -1.4193694591522217, + "logps/chosen": -43.3116569519043, + "logps/rejected": -47.158424377441406, + "loss": 0.6848, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": 0.0016503268852829933, + "rewards/margins": 0.01719464734196663, + "rewards/rejected": -0.015544322319328785, + "step": 2590 + }, + { + "epoch": 1.8731988472622478, + "grad_norm": 3.501920700073242, + "learning_rate": 1.858521889822565e-08, + "logits/chosen": -1.481728434562683, + "logits/rejected": -1.4720659255981445, + "logps/chosen": -44.771629333496094, + "logps/rejected": -47.331321716308594, + "loss": 0.6875, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": 0.00216684746555984, + "rewards/margins": 0.01176757737994194, + "rewards/rejected": -0.009600730612874031, + "step": 2600 + }, + { + "epoch": 1.8804034582132565, + "grad_norm": 3.0656304359436035, + "learning_rate": 1.8382857188502422e-08, + "logits/chosen": -1.480365514755249, + "logits/rejected": -1.4653451442718506, + "logps/chosen": -43.37881851196289, + "logps/rejected": -46.211647033691406, + "loss": 0.6857, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": 0.001201205188408494, + "rewards/margins": 0.015598386526107788, + "rewards/rejected": -0.014397179707884789, + "step": 2610 + }, + { + "epoch": 1.8876080691642652, + "grad_norm": 3.0691475868225098, + "learning_rate": 1.8180960636595234e-08, + "logits/chosen": -1.4347946643829346, + "logits/rejected": -1.4240456819534302, + "logps/chosen": -45.46348190307617, + "logps/rejected": -48.79424285888672, + "loss": 0.6844, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": 0.000794556166511029, + "rewards/margins": 0.01804800145328045, + "rewards/rejected": -0.017253447324037552, + "step": 2620 + }, + { + "epoch": 1.8948126801152738, + "grad_norm": 2.6301233768463135, + "learning_rate": 1.7979543434998015e-08, + "logits/chosen": -1.5175861120224, + "logits/rejected": -1.5130423307418823, + "logps/chosen": -54.06511688232422, + "logps/rejected": -55.68177032470703, + "loss": 0.6888, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.0019837587606161833, + "rewards/margins": 0.009002977050840855, + "rewards/rejected": -0.010986736044287682, + "step": 2630 + }, + { + "epoch": 1.9020172910662825, + "grad_norm": 3.1905744075775146, + "learning_rate": 1.7778619742508345e-08, + "logits/chosen": -1.4991130828857422, + "logits/rejected": -1.4788715839385986, + "logps/chosen": -48.78097152709961, + "logps/rejected": -50.549293518066406, + "loss": 0.6862, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.0013503096997737885, + "rewards/margins": 0.01445526909083128, + "rewards/rejected": -0.015805575996637344, + "step": 2640 + }, + { + "epoch": 1.9092219020172911, + "grad_norm": 5.37537145614624, + "learning_rate": 1.757820368323213e-08, + "logits/chosen": -1.4480946063995361, + "logits/rejected": -1.431839108467102, + "logps/chosen": -55.62885284423828, + "logps/rejected": -60.84233856201172, + "loss": 0.6854, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.00024756681523285806, + "rewards/margins": 0.01604391634464264, + "rewards/rejected": -0.016291480511426926, + "step": 2650 + }, + { + "epoch": 1.9164265129682998, + "grad_norm": 2.7058212757110596, + "learning_rate": 1.7378309345590803e-08, + "logits/chosen": -1.5186960697174072, + "logits/rejected": -1.521729826927185, + "logps/chosen": -48.12525177001953, + "logps/rejected": -51.73346710205078, + "loss": 0.6859, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.0006261245580390096, + "rewards/margins": 0.014795279130339622, + "rewards/rejected": -0.01416915375739336, + "step": 2660 + }, + { + "epoch": 1.9236311239193085, + "grad_norm": 3.0411376953125, + "learning_rate": 1.717895078133088e-08, + "logits/chosen": -1.537657618522644, + "logits/rejected": -1.5279037952423096, + "logps/chosen": -45.69334411621094, + "logps/rejected": -50.841529846191406, + "loss": 0.6846, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": 0.001616945257410407, + "rewards/margins": 0.017612287774682045, + "rewards/rejected": -0.015995342284440994, + "step": 2670 + }, + { + "epoch": 1.9308357348703171, + "grad_norm": 2.9507884979248047, + "learning_rate": 1.698014200453624e-08, + "logits/chosen": -1.512743353843689, + "logits/rejected": -1.516052007675171, + "logps/chosen": -48.50223922729492, + "logps/rejected": -53.1720085144043, + "loss": 0.6894, + "rewards/accuracies": 0.59375, + "rewards/chosen": 0.0005373925669118762, + "rewards/margins": 0.007933690212666988, + "rewards/rejected": -0.007396298460662365, + "step": 2680 + }, + { + "epoch": 1.9380403458213258, + "grad_norm": 3.1136631965637207, + "learning_rate": 1.6781896990642964e-08, + "logits/chosen": -1.4173403978347778, + "logits/rejected": -1.4077566862106323, + "logps/chosen": -53.65788650512695, + "logps/rejected": -55.55950927734375, + "loss": 0.6876, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.0008957240497693419, + "rewards/margins": 0.011654635891318321, + "rewards/rejected": -0.010758912190794945, + "step": 2690 + }, + { + "epoch": 1.9452449567723344, + "grad_norm": 3.702500343322754, + "learning_rate": 1.658422967545693e-08, + "logits/chosen": -1.545461893081665, + "logits/rejected": -1.5236259698867798, + "logps/chosen": -46.60261917114258, + "logps/rejected": -48.830589294433594, + "loss": 0.6866, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.0023856277111917734, + "rewards/margins": 0.013566548936069012, + "rewards/rejected": -0.015952177345752716, + "step": 2700 + }, + { + "epoch": 1.952449567723343, + "grad_norm": 3.34963059425354, + "learning_rate": 1.638715395417418e-08, + "logits/chosen": -1.5168092250823975, + "logits/rejected": -1.5006424188613892, + "logps/chosen": -47.71814727783203, + "logps/rejected": -50.16883087158203, + "loss": 0.6873, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.0012928284704685211, + "rewards/margins": 0.011976560577750206, + "rewards/rejected": -0.013269389048218727, + "step": 2710 + }, + { + "epoch": 1.9596541786743515, + "grad_norm": 3.404601573944092, + "learning_rate": 1.619068368040416e-08, + "logits/chosen": -1.5042836666107178, + "logits/rejected": -1.4942996501922607, + "logps/chosen": -42.337650299072266, + "logps/rejected": -47.94048309326172, + "loss": 0.6849, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": 0.0009236250771209598, + "rewards/margins": 0.016973894089460373, + "rewards/rejected": -0.016050271689891815, + "step": 2720 + }, + { + "epoch": 1.9668587896253602, + "grad_norm": 3.3033130168914795, + "learning_rate": 1.5994832665195853e-08, + "logits/chosen": -1.4348411560058594, + "logits/rejected": -1.4287351369857788, + "logps/chosen": -46.385520935058594, + "logps/rejected": -48.763214111328125, + "loss": 0.6874, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": 0.0006011429941281676, + "rewards/margins": 0.011924292892217636, + "rewards/rejected": -0.01132314931601286, + "step": 2730 + }, + { + "epoch": 1.9740634005763689, + "grad_norm": 3.290114641189575, + "learning_rate": 1.5799614676066906e-08, + "logits/chosen": -1.5618253946304321, + "logits/rejected": -1.5571167469024658, + "logps/chosen": -42.574127197265625, + "logps/rejected": -47.01802444458008, + "loss": 0.6854, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0009528351947665215, + "rewards/margins": 0.016010040417313576, + "rewards/rejected": -0.016962874680757523, + "step": 2740 + }, + { + "epoch": 1.9812680115273775, + "grad_norm": 2.788604974746704, + "learning_rate": 1.560504343603587e-08, + "logits/chosen": -1.4581258296966553, + "logits/rejected": -1.460876226425171, + "logps/chosen": -47.58959197998047, + "logps/rejected": -53.10105514526367, + "loss": 0.6864, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": 0.0016212237533181906, + "rewards/margins": 0.01379337441176176, + "rewards/rejected": -0.012172150425612926, + "step": 2750 + }, + { + "epoch": 1.9884726224783862, + "grad_norm": 2.688870668411255, + "learning_rate": 1.541113262265748e-08, + "logits/chosen": -1.5588115453720093, + "logits/rejected": -1.5540285110473633, + "logps/chosen": -47.810081481933594, + "logps/rejected": -51.99542236328125, + "loss": 0.6856, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": 0.0008094706572592258, + "rewards/margins": 0.01555786095559597, + "rewards/rejected": -0.01474839262664318, + "step": 2760 + }, + { + "epoch": 1.9956772334293948, + "grad_norm": 2.7986905574798584, + "learning_rate": 1.5217895867061227e-08, + "logits/chosen": -1.4807400703430176, + "logits/rejected": -1.4691799879074097, + "logps/chosen": -49.02349090576172, + "logps/rejected": -51.68681716918945, + "loss": 0.6855, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -1.1304626241326332e-05, + "rewards/margins": 0.015814241021871567, + "rewards/rejected": -0.01582554541528225, + "step": 2770 + }, + { + "epoch": 2.0028818443804033, + "grad_norm": 3.2165708541870117, + "learning_rate": 1.5025346752993098e-08, + "logits/chosen": -1.4738900661468506, + "logits/rejected": -1.4782806634902954, + "logps/chosen": -47.22654724121094, + "logps/rejected": -51.38325119018555, + "loss": 0.6888, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.002990271197631955, + "rewards/margins": 0.009079065173864365, + "rewards/rejected": -0.012069336138665676, + "step": 2780 + }, + { + "epoch": 2.010086455331412, + "grad_norm": 3.1721432209014893, + "learning_rate": 1.4833498815860756e-08, + "logits/chosen": -1.6040818691253662, + "logits/rejected": -1.5952320098876953, + "logps/chosen": -44.67755889892578, + "logps/rejected": -49.35480499267578, + "loss": 0.6835, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": 0.0012609490659087896, + "rewards/margins": 0.019852185621857643, + "rewards/rejected": -0.01859123632311821, + "step": 2790 + }, + { + "epoch": 2.0172910662824206, + "grad_norm": 3.4663138389587402, + "learning_rate": 1.4642365541781993e-08, + "logits/chosen": -1.4190483093261719, + "logits/rejected": -1.402295470237732, + "logps/chosen": -46.372066497802734, + "logps/rejected": -51.25910568237305, + "loss": 0.6851, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.0015671824803575873, + "rewards/margins": 0.016764571890234947, + "rewards/rejected": -0.018331754952669144, + "step": 2800 + }, + { + "epoch": 2.0244956772334293, + "grad_norm": 3.538506507873535, + "learning_rate": 1.4451960366636745e-08, + "logits/chosen": -1.5058993101119995, + "logits/rejected": -1.510118007659912, + "logps/chosen": -50.21342086791992, + "logps/rejected": -54.794654846191406, + "loss": 0.6862, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": 0.00038042213418520987, + "rewards/margins": 0.014330941252410412, + "rewards/rejected": -0.013950521126389503, + "step": 2810 + }, + { + "epoch": 2.031700288184438, + "grad_norm": 2.9821181297302246, + "learning_rate": 1.4262296675122592e-08, + "logits/chosen": -1.506753921508789, + "logits/rejected": -1.491409420967102, + "logps/chosen": -43.888484954833984, + "logps/rejected": -48.556732177734375, + "loss": 0.685, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.00017104865401051939, + "rewards/margins": 0.01672585867345333, + "rewards/rejected": -0.01689690724015236, + "step": 2820 + }, + { + "epoch": 2.0389048991354466, + "grad_norm": 3.436539888381958, + "learning_rate": 1.407338779981389e-08, + "logits/chosen": -1.4747148752212524, + "logits/rejected": -1.4630435705184937, + "logps/chosen": -41.41279602050781, + "logps/rejected": -46.371131896972656, + "loss": 0.6833, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0008243538322858512, + "rewards/margins": 0.02022518590092659, + "rewards/rejected": -0.021049540489912033, + "step": 2830 + }, + { + "epoch": 2.0461095100864553, + "grad_norm": 3.1391549110412598, + "learning_rate": 1.3885247020224534e-08, + "logits/chosen": -1.4744846820831299, + "logits/rejected": -1.4637953042984009, + "logps/chosen": -40.93677520751953, + "logps/rejected": -44.24010467529297, + "loss": 0.6836, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": 0.0011112012434750795, + "rewards/margins": 0.01986369863152504, + "rewards/rejected": -0.01875249855220318, + "step": 2840 + }, + { + "epoch": 2.053314121037464, + "grad_norm": 2.7591843605041504, + "learning_rate": 1.369788756187445e-08, + "logits/chosen": -1.5235010385513306, + "logits/rejected": -1.5118696689605713, + "logps/chosen": -46.81077194213867, + "logps/rejected": -48.06627655029297, + "loss": 0.6879, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.002389531582593918, + "rewards/margins": 0.011110137216746807, + "rewards/rejected": -0.0134996697306633, + "step": 2850 + }, + { + "epoch": 2.0605187319884726, + "grad_norm": 3.157299518585205, + "learning_rate": 1.3511322595359925e-08, + "logits/chosen": -1.532439112663269, + "logits/rejected": -1.5212130546569824, + "logps/chosen": -43.265541076660156, + "logps/rejected": -48.95451354980469, + "loss": 0.684, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.0006747387815266848, + "rewards/margins": 0.01883287914097309, + "rewards/rejected": -0.019507618620991707, + "step": 2860 + }, + { + "epoch": 2.0677233429394812, + "grad_norm": 3.223790407180786, + "learning_rate": 1.3325565235427716e-08, + "logits/chosen": -1.5531320571899414, + "logits/rejected": -1.5446122884750366, + "logps/chosen": -45.25885772705078, + "logps/rejected": -49.23384475708008, + "loss": 0.6846, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.0007360210875049233, + "rewards/margins": 0.017545271664857864, + "rewards/rejected": -0.01828129217028618, + "step": 2870 + }, + { + "epoch": 2.07492795389049, + "grad_norm": 3.642242670059204, + "learning_rate": 1.3140628540053218e-08, + "logits/chosen": -1.4586588144302368, + "logits/rejected": -1.45664381980896, + "logps/chosen": -45.87567138671875, + "logps/rejected": -49.37241744995117, + "loss": 0.6856, + "rewards/accuracies": 0.6812499761581421, + "rewards/chosen": 0.002558287465944886, + "rewards/margins": 0.015417991206049919, + "rewards/rejected": -0.012859704904258251, + "step": 2880 + }, + { + "epoch": 2.0821325648414986, + "grad_norm": 3.986879587173462, + "learning_rate": 1.2956525509522451e-08, + "logits/chosen": -1.4353663921356201, + "logits/rejected": -1.4404122829437256, + "logps/chosen": -47.817115783691406, + "logps/rejected": -51.359764099121094, + "loss": 0.6874, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": 0.001873530214652419, + "rewards/margins": 0.011945443227887154, + "rewards/rejected": -0.010071912780404091, + "step": 2890 + }, + { + "epoch": 2.089337175792507, + "grad_norm": 3.8390724658966064, + "learning_rate": 1.2773269085518267e-08, + "logits/chosen": -1.516808032989502, + "logits/rejected": -1.5112401247024536, + "logps/chosen": -52.496673583984375, + "logps/rejected": -56.082305908203125, + "loss": 0.6867, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": 0.0004946159897372127, + "rewards/margins": 0.013363862410187721, + "rewards/rejected": -0.012869248166680336, + "step": 2900 + }, + { + "epoch": 2.096541786743516, + "grad_norm": 2.6431021690368652, + "learning_rate": 1.2590872150210574e-08, + "logits/chosen": -1.5927358865737915, + "logits/rejected": -1.5763109922409058, + "logps/chosen": -45.6403694152832, + "logps/rejected": -47.73447799682617, + "loss": 0.684, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.004319643136113882, + "rewards/margins": 0.019127164036035538, + "rewards/rejected": -0.023446807637810707, + "step": 2910 + }, + { + "epoch": 2.1037463976945245, + "grad_norm": 2.831928014755249, + "learning_rate": 1.2409347525350775e-08, + "logits/chosen": -1.5002295970916748, + "logits/rejected": -1.481815218925476, + "logps/chosen": -47.42546844482422, + "logps/rejected": -51.29052734375, + "loss": 0.6834, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": 0.0010362366447225213, + "rewards/margins": 0.02007184363901615, + "rewards/rejected": -0.01903560571372509, + "step": 2920 + }, + { + "epoch": 2.110951008645533, + "grad_norm": 3.4328200817108154, + "learning_rate": 1.2228707971370421e-08, + "logits/chosen": -1.497158169746399, + "logits/rejected": -1.4785023927688599, + "logps/chosen": -42.02567672729492, + "logps/rejected": -44.48183059692383, + "loss": 0.6841, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": 0.002565313596278429, + "rewards/margins": 0.01863184943795204, + "rewards/rejected": -0.01606653444468975, + "step": 2930 + }, + { + "epoch": 2.118155619596542, + "grad_norm": 4.378489017486572, + "learning_rate": 1.2048966186484282e-08, + "logits/chosen": -1.5265371799468994, + "logits/rejected": -1.496654987335205, + "logps/chosen": -52.09857940673828, + "logps/rejected": -54.9652099609375, + "loss": 0.6863, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.0012157706078141928, + "rewards/margins": 0.014177958481013775, + "rewards/rejected": -0.01539373118430376, + "step": 2940 + }, + { + "epoch": 2.1253602305475505, + "grad_norm": 3.2968902587890625, + "learning_rate": 1.187013480579762e-08, + "logits/chosen": -1.4905649423599243, + "logits/rejected": -1.4843100309371948, + "logps/chosen": -45.35689163208008, + "logps/rejected": -49.30203628540039, + "loss": 0.6843, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.004151071421802044, + "rewards/margins": 0.01855386421084404, + "rewards/rejected": -0.02270493470132351, + "step": 2950 + }, + { + "epoch": 2.132564841498559, + "grad_norm": 4.340986251831055, + "learning_rate": 1.1692226400418073e-08, + "logits/chosen": -1.4142036437988281, + "logits/rejected": -1.406091332435608, + "logps/chosen": -48.97296142578125, + "logps/rejected": -51.94805145263672, + "loss": 0.6856, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.0036544338800013065, + "rewards/margins": 0.015584975481033325, + "rewards/rejected": -0.019239408895373344, + "step": 2960 + }, + { + "epoch": 2.139769452449568, + "grad_norm": 2.482980966567993, + "learning_rate": 1.1515253476571923e-08, + "logits/chosen": -1.4493087530136108, + "logits/rejected": -1.4434731006622314, + "logps/chosen": -44.389686584472656, + "logps/rejected": -50.919029235839844, + "loss": 0.6844, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.003044532146304846, + "rewards/margins": 0.018028805032372475, + "rewards/rejected": -0.021073337644338608, + "step": 2970 + }, + { + "epoch": 2.1469740634005765, + "grad_norm": 3.3252360820770264, + "learning_rate": 1.133922847472496e-08, + "logits/chosen": -1.4918544292449951, + "logits/rejected": -1.4879610538482666, + "logps/chosen": -52.49553298950195, + "logps/rejected": -54.95599365234375, + "loss": 0.6852, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.00021512620151042938, + "rewards/margins": 0.01650981977581978, + "rewards/rejected": -0.016724945977330208, + "step": 2980 + }, + { + "epoch": 2.154178674351585, + "grad_norm": 3.3002827167510986, + "learning_rate": 1.1164163768707952e-08, + "logits/chosen": -1.4666965007781982, + "logits/rejected": -1.4556134939193726, + "logps/chosen": -47.383094787597656, + "logps/rejected": -51.5208740234375, + "loss": 0.6828, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.00024200770712923259, + "rewards/margins": 0.021655386313796043, + "rewards/rejected": -0.021897396072745323, + "step": 2990 + }, + { + "epoch": 2.161383285302594, + "grad_norm": 3.350294589996338, + "learning_rate": 1.0990071664846861e-08, + "logits/chosen": -1.4408986568450928, + "logits/rejected": -1.4307146072387695, + "logps/chosen": -48.706851959228516, + "logps/rejected": -53.8525276184082, + "loss": 0.6822, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": 0.0007291844231076539, + "rewards/margins": 0.02262338064610958, + "rewards/rejected": -0.021894199773669243, + "step": 3000 + }, + { + "epoch": 2.1685878962536025, + "grad_norm": 3.0558321475982666, + "learning_rate": 1.0816964401097739e-08, + "logits/chosen": -1.4839801788330078, + "logits/rejected": -1.473850965499878, + "logps/chosen": -42.997833251953125, + "logps/rejected": -45.74707794189453, + "loss": 0.6859, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.0003010531945619732, + "rewards/margins": 0.015322072431445122, + "rewards/rejected": -0.015623128041625023, + "step": 3010 + }, + { + "epoch": 2.175792507204611, + "grad_norm": 3.894000768661499, + "learning_rate": 1.0644854146186406e-08, + "logits/chosen": -1.5161851644515991, + "logits/rejected": -1.4984285831451416, + "logps/chosen": -48.15696716308594, + "logps/rejected": -52.829368591308594, + "loss": 0.6831, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.001691700192168355, + "rewards/margins": 0.021001458168029785, + "rewards/rejected": -0.02269316092133522, + "step": 3020 + }, + { + "epoch": 2.18299711815562, + "grad_norm": 3.200721025466919, + "learning_rate": 1.0473752998753114e-08, + "logits/chosen": -1.4961122274398804, + "logits/rejected": -1.4744278192520142, + "logps/chosen": -48.510780334472656, + "logps/rejected": -51.6027717590332, + "loss": 0.6826, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": 0.0015695259207859635, + "rewards/margins": 0.021745964884757996, + "rewards/rejected": -0.020176438614726067, + "step": 3030 + }, + { + "epoch": 2.1902017291066285, + "grad_norm": 2.9486958980560303, + "learning_rate": 1.030367298650201e-08, + "logits/chosen": -1.494507074356079, + "logits/rejected": -1.4939751625061035, + "logps/chosen": -48.71635818481445, + "logps/rejected": -53.54632568359375, + "loss": 0.6874, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.003486091736704111, + "rewards/margins": 0.01207701861858368, + "rewards/rejected": -0.015563109889626503, + "step": 3040 + }, + { + "epoch": 2.1974063400576367, + "grad_norm": 3.8673534393310547, + "learning_rate": 1.0134626065355675e-08, + "logits/chosen": -1.5955169200897217, + "logits/rejected": -1.5843254327774048, + "logps/chosen": -49.311912536621094, + "logps/rejected": -52.8543586730957, + "loss": 0.6827, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": 0.0020087913144379854, + "rewards/margins": 0.02148135006427765, + "rewards/rejected": -0.01947256177663803, + "step": 3050 + }, + { + "epoch": 2.2046109510086453, + "grad_norm": 3.485724449157715, + "learning_rate": 9.966624118614611e-09, + "logits/chosen": -1.4923583269119263, + "logits/rejected": -1.4730615615844727, + "logps/chosen": -52.26487350463867, + "logps/rejected": -55.42426681518555, + "loss": 0.6839, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": 0.0026403036899864674, + "rewards/margins": 0.0191368255764246, + "rewards/rejected": -0.01649652048945427, + "step": 3060 + }, + { + "epoch": 2.211815561959654, + "grad_norm": 2.4057836532592773, + "learning_rate": 9.799678956121976e-09, + "logits/chosen": -1.4374769926071167, + "logits/rejected": -1.4209473133087158, + "logps/chosen": -45.876285552978516, + "logps/rejected": -48.4627799987793, + "loss": 0.6872, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.0018981884932145476, + "rewards/margins": 0.012246804311871529, + "rewards/rejected": -0.01414499245584011, + "step": 3070 + }, + { + "epoch": 2.2190201729106627, + "grad_norm": 3.5483055114746094, + "learning_rate": 9.633802313433314e-09, + "logits/chosen": -1.4146339893341064, + "logits/rejected": -1.4106262922286987, + "logps/chosen": -48.35334014892578, + "logps/rejected": -50.88026809692383, + "loss": 0.6853, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.0011129272170364857, + "rewards/margins": 0.016139192506670952, + "rewards/rejected": -0.017252117395401, + "step": 3080 + }, + { + "epoch": 2.2262247838616713, + "grad_norm": 2.770353317260742, + "learning_rate": 9.469005850991705e-09, + "logits/chosen": -1.4854236841201782, + "logits/rejected": -1.4725892543792725, + "logps/chosen": -47.15768051147461, + "logps/rejected": -48.600120544433594, + "loss": 0.6845, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.0014169791247695684, + "rewards/margins": 0.01794120855629444, + "rewards/rejected": -0.019358184188604355, + "step": 3090 + }, + { + "epoch": 2.23342939481268, + "grad_norm": 3.1680026054382324, + "learning_rate": 9.305301153307949e-09, + "logits/chosen": -1.4969298839569092, + "logits/rejected": -1.5010452270507812, + "logps/chosen": -39.92610549926758, + "logps/rejected": -43.93165969848633, + "loss": 0.6835, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.004226221702992916, + "rewards/margins": 0.019832659512758255, + "rewards/rejected": -0.024058884009718895, + "step": 3100 + }, + { + "epoch": 2.2406340057636887, + "grad_norm": 2.740999937057495, + "learning_rate": 9.142699728146336e-09, + "logits/chosen": -1.4348478317260742, + "logits/rejected": -1.426283597946167, + "logps/chosen": -46.04469299316406, + "logps/rejected": -50.985870361328125, + "loss": 0.6845, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.00199733953922987, + "rewards/margins": 0.01793844625353813, + "rewards/rejected": -0.019935782998800278, + "step": 3110 + }, + { + "epoch": 2.2478386167146973, + "grad_norm": 2.9771533012390137, + "learning_rate": 8.981213005715627e-09, + "logits/chosen": -1.5030255317687988, + "logits/rejected": -1.5023804903030396, + "logps/chosen": -44.155548095703125, + "logps/rejected": -48.9468994140625, + "loss": 0.6844, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0017275598365813494, + "rewards/margins": 0.017975138500332832, + "rewards/rejected": -0.019702699035406113, + "step": 3120 + }, + { + "epoch": 2.255043227665706, + "grad_norm": 3.6485941410064697, + "learning_rate": 8.820852337865611e-09, + "logits/chosen": -1.5533548593521118, + "logits/rejected": -1.537881851196289, + "logps/chosen": -45.01945114135742, + "logps/rejected": -48.55274200439453, + "loss": 0.6847, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.000804073759354651, + "rewards/margins": 0.01763494312763214, + "rewards/rejected": -0.018439019098877907, + "step": 3130 + }, + { + "epoch": 2.2622478386167146, + "grad_norm": 2.8052473068237305, + "learning_rate": 8.661628997289044e-09, + "logits/chosen": -1.4350953102111816, + "logits/rejected": -1.4218308925628662, + "logps/chosen": -45.344215393066406, + "logps/rejected": -49.77771759033203, + "loss": 0.6842, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.000743370212148875, + "rewards/margins": 0.01851017400622368, + "rewards/rejected": -0.019253544509410858, + "step": 3140 + }, + { + "epoch": 2.2694524495677233, + "grad_norm": 2.799929141998291, + "learning_rate": 8.503554176729341e-09, + "logits/chosen": -1.4219027757644653, + "logits/rejected": -1.4167585372924805, + "logps/chosen": -45.40351104736328, + "logps/rejected": -48.951255798339844, + "loss": 0.6836, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": 0.0013620567042380571, + "rewards/margins": 0.019856946542859077, + "rewards/rejected": -0.018494892865419388, + "step": 3150 + }, + { + "epoch": 2.276657060518732, + "grad_norm": 3.7959582805633545, + "learning_rate": 8.346638988193636e-09, + "logits/chosen": -1.4739030599594116, + "logits/rejected": -1.4716893434524536, + "logps/chosen": -40.54225158691406, + "logps/rejected": -46.273109436035156, + "loss": 0.6838, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.0006361313280649483, + "rewards/margins": 0.019271841272711754, + "rewards/rejected": -0.01990797556936741, + "step": 3160 + }, + { + "epoch": 2.2838616714697406, + "grad_norm": 4.178712368011475, + "learning_rate": 8.19089446217176e-09, + "logits/chosen": -1.4299513101577759, + "logits/rejected": -1.4083366394042969, + "logps/chosen": -45.69487380981445, + "logps/rejected": -51.08367919921875, + "loss": 0.68, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.002962021389976144, + "rewards/margins": 0.027211204171180725, + "rewards/rejected": -0.024249184876680374, + "step": 3170 + }, + { + "epoch": 2.2910662824207493, + "grad_norm": 3.0471279621124268, + "learning_rate": 8.036331546860777e-09, + "logits/chosen": -1.457237958908081, + "logits/rejected": -1.45479154586792, + "logps/chosen": -45.34724044799805, + "logps/rejected": -48.17335891723633, + "loss": 0.6876, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.0030756318010389805, + "rewards/margins": 0.011720901355147362, + "rewards/rejected": -0.014796535484492779, + "step": 3180 + }, + { + "epoch": 2.298270893371758, + "grad_norm": 3.6594536304473877, + "learning_rate": 7.882961107395416e-09, + "logits/chosen": -1.4986907243728638, + "logits/rejected": -1.4889423847198486, + "logps/chosen": -52.23211669921875, + "logps/rejected": -52.596961975097656, + "loss": 0.687, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.0050607905723154545, + "rewards/margins": 0.01296902447938919, + "rewards/rejected": -0.01802981272339821, + "step": 3190 + }, + { + "epoch": 2.3054755043227666, + "grad_norm": 4.586586952209473, + "learning_rate": 7.73079392508428e-09, + "logits/chosen": -1.4226272106170654, + "logits/rejected": -1.4266163110733032, + "logps/chosen": -49.690284729003906, + "logps/rejected": -56.577552795410156, + "loss": 0.6827, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.0031807306222617626, + "rewards/margins": 0.021851547062397003, + "rewards/rejected": -0.025032276287674904, + "step": 3200 + }, + { + "epoch": 2.3126801152737753, + "grad_norm": 3.5938634872436523, + "learning_rate": 7.579840696651938e-09, + "logits/chosen": -1.5148307085037231, + "logits/rejected": -1.5085079669952393, + "logps/chosen": -42.24188232421875, + "logps/rejected": -45.60350799560547, + "loss": 0.6843, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.003505673259496689, + "rewards/margins": 0.0183546245098114, + "rewards/rejected": -0.02186029590666294, + "step": 3210 + }, + { + "epoch": 2.319884726224784, + "grad_norm": 4.208233833312988, + "learning_rate": 7.43011203348704e-09, + "logits/chosen": -1.358338475227356, + "logits/rejected": -1.353421926498413, + "logps/chosen": -53.043052673339844, + "logps/rejected": -53.77092361450195, + "loss": 0.6859, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0048605212941765785, + "rewards/margins": 0.015131947584450245, + "rewards/rejected": -0.019992467015981674, + "step": 3220 + }, + { + "epoch": 2.3270893371757926, + "grad_norm": 3.2215311527252197, + "learning_rate": 7.281618460896344e-09, + "logits/chosen": -1.4850248098373413, + "logits/rejected": -1.4745427370071411, + "logps/chosen": -46.2344970703125, + "logps/rejected": -50.65446472167969, + "loss": 0.6847, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.0013599084923043847, + "rewards/margins": 0.017538536339998245, + "rewards/rejected": -0.01889844611287117, + "step": 3230 + }, + { + "epoch": 2.3342939481268012, + "grad_norm": 2.962292194366455, + "learning_rate": 7.134370417364849e-09, + "logits/chosen": -1.432776689529419, + "logits/rejected": -1.4246224164962769, + "logps/chosen": -45.164649963378906, + "logps/rejected": -47.972198486328125, + "loss": 0.6868, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.006024752743542194, + "rewards/margins": 0.013254925608634949, + "rewards/rejected": -0.019279679283499718, + "step": 3240 + }, + { + "epoch": 2.34149855907781, + "grad_norm": 4.003485202789307, + "learning_rate": 6.988378253821981e-09, + "logits/chosen": -1.4599487781524658, + "logits/rejected": -1.4520995616912842, + "logps/chosen": -51.3668098449707, + "logps/rejected": -54.87144088745117, + "loss": 0.6869, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.00047857934259809554, + "rewards/margins": 0.013025891967117786, + "rewards/rejected": -0.012547312304377556, + "step": 3250 + }, + { + "epoch": 2.3487031700288186, + "grad_norm": 3.05368971824646, + "learning_rate": 6.8436522329140186e-09, + "logits/chosen": -1.4416824579238892, + "logits/rejected": -1.4479385614395142, + "logps/chosen": -46.932762145996094, + "logps/rejected": -50.70888900756836, + "loss": 0.6856, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.0023715035058557987, + "rewards/margins": 0.01593146100640297, + "rewards/rejected": -0.01830296590924263, + "step": 3260 + }, + { + "epoch": 2.3559077809798272, + "grad_norm": 3.5134246349334717, + "learning_rate": 6.700202528282603e-09, + "logits/chosen": -1.4186961650848389, + "logits/rejected": -1.399042010307312, + "logps/chosen": -48.512840270996094, + "logps/rejected": -51.45924758911133, + "loss": 0.6837, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.005258539691567421, + "rewards/margins": 0.01997128315269947, + "rewards/rejected": -0.02522982284426689, + "step": 3270 + }, + { + "epoch": 2.363112391930836, + "grad_norm": 3.723384141921997, + "learning_rate": 6.558039223849668e-09, + "logits/chosen": -1.5115994215011597, + "logits/rejected": -1.492408037185669, + "logps/chosen": -46.227333068847656, + "logps/rejected": -52.55454635620117, + "loss": 0.6819, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.0013681629206985235, + "rewards/margins": 0.023501722142100334, + "rewards/rejected": -0.0248698852956295, + "step": 3280 + }, + { + "epoch": 2.3703170028818445, + "grad_norm": 2.8809094429016113, + "learning_rate": 6.417172313108471e-09, + "logits/chosen": -1.4260962009429932, + "logits/rejected": -1.4144647121429443, + "logps/chosen": -44.0165901184082, + "logps/rejected": -47.23214340209961, + "loss": 0.6853, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.00685364892706275, + "rewards/margins": 0.016492731869220734, + "rewards/rejected": -0.02334637939929962, + "step": 3290 + }, + { + "epoch": 2.377521613832853, + "grad_norm": 2.958158254623413, + "learning_rate": 6.277611698421179e-09, + "logits/chosen": -1.5557560920715332, + "logits/rejected": -1.5354154109954834, + "logps/chosen": -39.024757385253906, + "logps/rejected": -44.89704513549805, + "loss": 0.6816, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.003160464344546199, + "rewards/margins": 0.02409496158361435, + "rewards/rejected": -0.027255425229668617, + "step": 3300 + }, + { + "epoch": 2.3847262247838614, + "grad_norm": 4.754333972930908, + "learning_rate": 6.139367190322714e-09, + "logits/chosen": -1.4941930770874023, + "logits/rejected": -1.494152307510376, + "logps/chosen": -52.68627166748047, + "logps/rejected": -58.06211471557617, + "loss": 0.6857, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.003172674449160695, + "rewards/margins": 0.015608375892043114, + "rewards/rejected": -0.01878105290234089, + "step": 3310 + }, + { + "epoch": 2.39193083573487, + "grad_norm": 2.502631902694702, + "learning_rate": 6.002448506831171e-09, + "logits/chosen": -1.4809995889663696, + "logits/rejected": -1.4765173196792603, + "logps/chosen": -44.01865005493164, + "logps/rejected": -49.128318786621094, + "loss": 0.6847, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.0028991461731493473, + "rewards/margins": 0.017662644386291504, + "rewards/rejected": -0.020561790093779564, + "step": 3320 + }, + { + "epoch": 2.3991354466858787, + "grad_norm": 3.0286099910736084, + "learning_rate": 5.866865272764607e-09, + "logits/chosen": -1.4967315196990967, + "logits/rejected": -1.490002989768982, + "logps/chosen": -46.34364318847656, + "logps/rejected": -50.384918212890625, + "loss": 0.6856, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.005602546967566013, + "rewards/margins": 0.015632400289177895, + "rewards/rejected": -0.021234950050711632, + "step": 3330 + }, + { + "epoch": 2.4063400576368874, + "grad_norm": 4.623770713806152, + "learning_rate": 5.7326270190645595e-09, + "logits/chosen": -1.3300215005874634, + "logits/rejected": -1.3248698711395264, + "logps/chosen": -49.87703323364258, + "logps/rejected": -51.88032913208008, + "loss": 0.6849, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.00470289122313261, + "rewards/margins": 0.017181994393467903, + "rewards/rejected": -0.02188488282263279, + "step": 3340 + }, + { + "epoch": 2.413544668587896, + "grad_norm": 3.6492199897766113, + "learning_rate": 5.599743182125938e-09, + "logits/chosen": -1.539113163948059, + "logits/rejected": -1.5383161306381226, + "logps/chosen": -48.7152214050293, + "logps/rejected": -54.0184326171875, + "loss": 0.685, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.0006119796307757497, + "rewards/margins": 0.01680837571620941, + "rewards/rejected": -0.01742035523056984, + "step": 3350 + }, + { + "epoch": 2.4207492795389047, + "grad_norm": 3.46722412109375, + "learning_rate": 5.46822310313379e-09, + "logits/chosen": -1.5624011754989624, + "logits/rejected": -1.5669426918029785, + "logps/chosen": -49.3877067565918, + "logps/rejected": -52.6993408203125, + "loss": 0.6876, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.00427304208278656, + "rewards/margins": 0.011617867276072502, + "rewards/rejected": -0.015890907496213913, + "step": 3360 + }, + { + "epoch": 2.4279538904899134, + "grad_norm": 3.6487491130828857, + "learning_rate": 5.33807602740658e-09, + "logits/chosen": -1.557901382446289, + "logits/rejected": -1.5423341989517212, + "logps/chosen": -41.81071853637695, + "logps/rejected": -47.327213287353516, + "loss": 0.6803, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.0005742600187659264, + "rewards/margins": 0.026613151654601097, + "rewards/rejected": -0.027187416329979897, + "step": 3370 + }, + { + "epoch": 2.435158501440922, + "grad_norm": 3.7982168197631836, + "learning_rate": 5.209311103746334e-09, + "logits/chosen": -1.476545810699463, + "logits/rejected": -1.4724234342575073, + "logps/chosen": -47.061851501464844, + "logps/rejected": -52.321556091308594, + "loss": 0.684, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0026500283274799585, + "rewards/margins": 0.01901710033416748, + "rewards/rejected": -0.021667128428816795, + "step": 3380 + }, + { + "epoch": 2.4423631123919307, + "grad_norm": 4.152634620666504, + "learning_rate": 5.081937383795484e-09, + "logits/chosen": -1.4653098583221436, + "logits/rejected": -1.4548850059509277, + "logps/chosen": -44.15032958984375, + "logps/rejected": -48.76853942871094, + "loss": 0.6824, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0003869136853609234, + "rewards/margins": 0.022202041000127792, + "rewards/rejected": -0.022588953375816345, + "step": 3390 + }, + { + "epoch": 2.4495677233429394, + "grad_norm": 3.6726996898651123, + "learning_rate": 4.955963821400599e-09, + "logits/chosen": -1.5260181427001953, + "logits/rejected": -1.5078346729278564, + "logps/chosen": -46.817447662353516, + "logps/rejected": -49.603084564208984, + "loss": 0.683, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.0016360729932785034, + "rewards/margins": 0.021213185042142868, + "rewards/rejected": -0.022849254310131073, + "step": 3400 + }, + { + "epoch": 2.456772334293948, + "grad_norm": 2.7636361122131348, + "learning_rate": 4.831399271982928e-09, + "logits/chosen": -1.3976712226867676, + "logits/rejected": -1.3814750909805298, + "logps/chosen": -49.63505172729492, + "logps/rejected": -52.69793701171875, + "loss": 0.6833, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.0008561603026464581, + "rewards/margins": 0.02064884826540947, + "rewards/rejected": -0.02150500938296318, + "step": 3410 + }, + { + "epoch": 2.4639769452449567, + "grad_norm": 3.9705750942230225, + "learning_rate": 4.708252491915951e-09, + "logits/chosen": -1.5011204481124878, + "logits/rejected": -1.4910115003585815, + "logps/chosen": -47.02643585205078, + "logps/rejected": -51.54557418823242, + "loss": 0.6833, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0034367390908300877, + "rewards/margins": 0.0207851342856884, + "rewards/rejected": -0.02422187477350235, + "step": 3420 + }, + { + "epoch": 2.4711815561959654, + "grad_norm": 2.888221263885498, + "learning_rate": 4.58653213790981e-09, + "logits/chosen": -1.4987213611602783, + "logits/rejected": -1.4801981449127197, + "logps/chosen": -47.3719596862793, + "logps/rejected": -51.944847106933594, + "loss": 0.684, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.0016913721337914467, + "rewards/margins": 0.019097764045000076, + "rewards/rejected": -0.020789138972759247, + "step": 3430 + }, + { + "epoch": 2.478386167146974, + "grad_norm": 3.370331287384033, + "learning_rate": 4.466246766402773e-09, + "logits/chosen": -1.4718296527862549, + "logits/rejected": -1.4525142908096313, + "logps/chosen": -48.6312141418457, + "logps/rejected": -52.31147384643555, + "loss": 0.6821, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.0008567210170440376, + "rewards/margins": 0.022917402908205986, + "rewards/rejected": -0.023774122819304466, + "step": 3440 + }, + { + "epoch": 2.4855907780979827, + "grad_norm": 3.688567638397217, + "learning_rate": 4.347404832959775e-09, + "logits/chosen": -1.5265666246414185, + "logits/rejected": -1.515133261680603, + "logps/chosen": -44.58491516113281, + "logps/rejected": -48.72705841064453, + "loss": 0.6843, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.00400127936154604, + "rewards/margins": 0.018310952931642532, + "rewards/rejected": -0.022312233224511147, + "step": 3450 + }, + { + "epoch": 2.4927953890489913, + "grad_norm": 3.47495436668396, + "learning_rate": 4.230014691678016e-09, + "logits/chosen": -1.4787131547927856, + "logits/rejected": -1.4799821376800537, + "logps/chosen": -49.35828399658203, + "logps/rejected": -51.017032623291016, + "loss": 0.6868, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.005290646571666002, + "rewards/margins": 0.013366172090172768, + "rewards/rejected": -0.018656820058822632, + "step": 3460 + }, + { + "epoch": 2.5, + "grad_norm": 3.119842529296875, + "learning_rate": 4.114084594599707e-09, + "logits/chosen": -1.4643129110336304, + "logits/rejected": -1.4411898851394653, + "logps/chosen": -45.54496383666992, + "logps/rejected": -51.423011779785156, + "loss": 0.6825, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.0023778839968144894, + "rewards/margins": 0.022280005738139153, + "rewards/rejected": -0.02465788833796978, + "step": 3470 + }, + { + "epoch": 2.5072046109510087, + "grad_norm": 3.103360176086426, + "learning_rate": 3.9996226911319546e-09, + "logits/chosen": -1.481475830078125, + "logits/rejected": -1.4590504169464111, + "logps/chosen": -45.547027587890625, + "logps/rejected": -48.61585235595703, + "loss": 0.684, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.002360361162573099, + "rewards/margins": 0.01899893209338188, + "rewards/rejected": -0.021359292790293694, + "step": 3480 + }, + { + "epoch": 2.5144092219020173, + "grad_norm": 3.3628768920898438, + "learning_rate": 3.886637027473949e-09, + "logits/chosen": -1.5132756233215332, + "logits/rejected": -1.5091888904571533, + "logps/chosen": -47.463836669921875, + "logps/rejected": -51.489356994628906, + "loss": 0.6839, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.004199695773422718, + "rewards/margins": 0.018925204873085022, + "rewards/rejected": -0.023124899715185165, + "step": 3490 + }, + { + "epoch": 2.521613832853026, + "grad_norm": 3.1813926696777344, + "learning_rate": 3.775135546051295e-09, + "logits/chosen": -1.4061282873153687, + "logits/rejected": -1.4064362049102783, + "logps/chosen": -45.96582794189453, + "logps/rejected": -50.36768341064453, + "loss": 0.6823, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.0037177815102040768, + "rewards/margins": 0.022428225725889206, + "rewards/rejected": -0.02614600956439972, + "step": 3500 + }, + { + "epoch": 2.5288184438040346, + "grad_norm": 3.360513687133789, + "learning_rate": 3.665126084957723e-09, + "logits/chosen": -1.4706053733825684, + "logits/rejected": -1.4622557163238525, + "logps/chosen": -50.74425506591797, + "logps/rejected": -51.06829071044922, + "loss": 0.6845, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.004353958647698164, + "rewards/margins": 0.018070969730615616, + "rewards/rejected": -0.022424932569265366, + "step": 3510 + }, + { + "epoch": 2.5360230547550433, + "grad_norm": 3.123054265975952, + "learning_rate": 3.556616377404101e-09, + "logits/chosen": -1.5037996768951416, + "logits/rejected": -1.4918115139007568, + "logps/chosen": -51.80335235595703, + "logps/rejected": -55.71653366088867, + "loss": 0.6822, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.006135745905339718, + "rewards/margins": 0.022644102573394775, + "rewards/rejected": -0.028779853135347366, + "step": 3520 + }, + { + "epoch": 2.543227665706052, + "grad_norm": 3.3846046924591064, + "learning_rate": 3.4496140511748125e-09, + "logits/chosen": -1.4872541427612305, + "logits/rejected": -1.468563437461853, + "logps/chosen": -48.045196533203125, + "logps/rejected": -50.846946716308594, + "loss": 0.6838, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.006296695210039616, + "rewards/margins": 0.019439449533820152, + "rewards/rejected": -0.025736143812537193, + "step": 3530 + }, + { + "epoch": 2.5504322766570606, + "grad_norm": 3.930396318435669, + "learning_rate": 3.3441266280915427e-09, + "logits/chosen": -1.4504344463348389, + "logits/rejected": -1.447840929031372, + "logps/chosen": -53.62995147705078, + "logps/rejected": -57.090415954589844, + "loss": 0.6855, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.001316823298111558, + "rewards/margins": 0.01597544364631176, + "rewards/rejected": -0.017292268574237823, + "step": 3540 + }, + { + "epoch": 2.5576368876080693, + "grad_norm": 3.4803848266601562, + "learning_rate": 3.2401615234845693e-09, + "logits/chosen": -1.4945684671401978, + "logits/rejected": -1.4767378568649292, + "logps/chosen": -53.949989318847656, + "logps/rejected": -57.206214904785156, + "loss": 0.6823, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.006395784206688404, + "rewards/margins": 0.022547531872987747, + "rewards/rejected": -0.028943315148353577, + "step": 3550 + }, + { + "epoch": 2.564841498559078, + "grad_norm": 3.0999386310577393, + "learning_rate": 3.1377260456714375e-09, + "logits/chosen": -1.3246692419052124, + "logits/rejected": -1.3125396966934204, + "logps/chosen": -48.97364807128906, + "logps/rejected": -53.9735221862793, + "loss": 0.6838, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.006135467439889908, + "rewards/margins": 0.019646648317575455, + "rewards/rejected": -0.025782117620110512, + "step": 3560 + }, + { + "epoch": 2.5720461095100866, + "grad_norm": 3.6545066833496094, + "learning_rate": 3.0368273954432698e-09, + "logits/chosen": -1.5312678813934326, + "logits/rejected": -1.5034937858581543, + "logps/chosen": -51.01383590698242, + "logps/rejected": -53.1749153137207, + "loss": 0.6848, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.005504480563104153, + "rewards/margins": 0.017458677291870117, + "rewards/rejected": -0.022963156923651695, + "step": 3570 + }, + { + "epoch": 2.5792507204610953, + "grad_norm": 2.9861600399017334, + "learning_rate": 2.937472665558541e-09, + "logits/chosen": -1.5554245710372925, + "logits/rejected": -1.548001766204834, + "logps/chosen": -45.340965270996094, + "logps/rejected": -47.545833587646484, + "loss": 0.6821, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.007226808462291956, + "rewards/margins": 0.02288452349603176, + "rewards/rejected": -0.030111337080597878, + "step": 3580 + }, + { + "epoch": 2.586455331412104, + "grad_norm": 4.0210347175598145, + "learning_rate": 2.8396688402445053e-09, + "logits/chosen": -1.5757755041122437, + "logits/rejected": -1.5586358308792114, + "logps/chosen": -45.34209060668945, + "logps/rejected": -51.52289581298828, + "loss": 0.6819, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.009358217939734459, + "rewards/margins": 0.023509806022047997, + "rewards/rejected": -0.032868027687072754, + "step": 3590 + }, + { + "epoch": 2.5936599423631126, + "grad_norm": 4.072700500488281, + "learning_rate": 2.7434227947062324e-09, + "logits/chosen": -1.5280077457427979, + "logits/rejected": -1.5168155431747437, + "logps/chosen": -53.77875518798828, + "logps/rejected": -57.271522521972656, + "loss": 0.6859, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.004612335003912449, + "rewards/margins": 0.015178831294178963, + "rewards/rejected": -0.019791167229413986, + "step": 3600 + }, + { + "epoch": 2.6008645533141213, + "grad_norm": 3.0110554695129395, + "learning_rate": 2.6487412946432976e-09, + "logits/chosen": -1.4473832845687866, + "logits/rejected": -1.4343912601470947, + "logps/chosen": -49.441104888916016, + "logps/rejected": -52.14855194091797, + "loss": 0.6827, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.011908985674381256, + "rewards/margins": 0.02195003628730774, + "rewards/rejected": -0.033859021961688995, + "step": 3610 + }, + { + "epoch": 2.60806916426513, + "grad_norm": 3.392671823501587, + "learning_rate": 2.5556309957742024e-09, + "logits/chosen": -1.445562481880188, + "logits/rejected": -1.4361542463302612, + "logps/chosen": -44.897274017333984, + "logps/rejected": -51.98448944091797, + "loss": 0.6809, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.002028597518801689, + "rewards/margins": 0.025439077988266945, + "rewards/rejected": -0.023410480469465256, + "step": 3620 + }, + { + "epoch": 2.6152737752161386, + "grad_norm": 3.324449300765991, + "learning_rate": 2.4640984433684758e-09, + "logits/chosen": -1.559381365776062, + "logits/rejected": -1.5451711416244507, + "logps/chosen": -50.890586853027344, + "logps/rejected": -53.00829315185547, + "loss": 0.6841, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.0038901835214346647, + "rewards/margins": 0.01893722079694271, + "rewards/rejected": -0.022827405482530594, + "step": 3630 + }, + { + "epoch": 2.6224783861671472, + "grad_norm": 3.6242074966430664, + "learning_rate": 2.3741500717865987e-09, + "logits/chosen": -1.4463894367218018, + "logits/rejected": -1.45791757106781, + "logps/chosen": -47.297630310058594, + "logps/rejected": -52.06903839111328, + "loss": 0.6844, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.0010312128579244018, + "rewards/margins": 0.018220648169517517, + "rewards/rejected": -0.019251862540841103, + "step": 3640 + }, + { + "epoch": 2.629682997118156, + "grad_norm": 3.100571632385254, + "learning_rate": 2.285792204027678e-09, + "logits/chosen": -1.422215223312378, + "logits/rejected": -1.4117518663406372, + "logps/chosen": -47.42947006225586, + "logps/rejected": -54.567771911621094, + "loss": 0.6826, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.004024113062769175, + "rewards/margins": 0.021751539781689644, + "rewards/rejected": -0.02577565237879753, + "step": 3650 + }, + { + "epoch": 2.636887608069164, + "grad_norm": 3.6712942123413086, + "learning_rate": 2.199031051284972e-09, + "logits/chosen": -1.5009148120880127, + "logits/rejected": -1.500221610069275, + "logps/chosen": -48.28940963745117, + "logps/rejected": -52.26566696166992, + "loss": 0.684, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.00409685168415308, + "rewards/margins": 0.01923191547393799, + "rewards/rejected": -0.023328769952058792, + "step": 3660 + }, + { + "epoch": 2.6440922190201728, + "grad_norm": 3.7881343364715576, + "learning_rate": 2.113872712509254e-09, + "logits/chosen": -1.408132791519165, + "logits/rejected": -1.3988707065582275, + "logps/chosen": -56.1018180847168, + "logps/rejected": -59.201927185058594, + "loss": 0.6834, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.00847399141639471, + "rewards/margins": 0.02007477544248104, + "rewards/rejected": -0.028548765927553177, + "step": 3670 + }, + { + "epoch": 2.6512968299711814, + "grad_norm": 3.475985050201416, + "learning_rate": 2.0303231739801143e-09, + "logits/chosen": -1.411527156829834, + "logits/rejected": -1.398315668106079, + "logps/chosen": -50.648380279541016, + "logps/rejected": -54.86267852783203, + "loss": 0.6846, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.008079654537141323, + "rewards/margins": 0.01782270334661007, + "rewards/rejected": -0.025902356952428818, + "step": 3680 + }, + { + "epoch": 2.65850144092219, + "grad_norm": 3.8976125717163086, + "learning_rate": 1.948388308885102e-09, + "logits/chosen": -1.5758289098739624, + "logits/rejected": -1.5607655048370361, + "logps/chosen": -50.037288665771484, + "logps/rejected": -52.93767547607422, + "loss": 0.6852, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0028561349026858807, + "rewards/margins": 0.01672014407813549, + "rewards/rejected": -0.01957627758383751, + "step": 3690 + }, + { + "epoch": 2.6657060518731988, + "grad_norm": 3.1043570041656494, + "learning_rate": 1.86807387690692e-09, + "logits/chosen": -1.5538166761398315, + "logits/rejected": -1.546657919883728, + "logps/chosen": -50.15034866333008, + "logps/rejected": -57.51653289794922, + "loss": 0.6792, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.0009024621685966849, + "rewards/margins": 0.029041822999715805, + "rewards/rejected": -0.029944289475679398, + "step": 3700 + }, + { + "epoch": 2.6729106628242074, + "grad_norm": 3.4918878078460693, + "learning_rate": 1.789385523818493e-09, + "logits/chosen": -1.477716326713562, + "logits/rejected": -1.4800388813018799, + "logps/chosen": -45.15003204345703, + "logps/rejected": -51.051692962646484, + "loss": 0.6825, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.002582186833024025, + "rewards/margins": 0.021981868892908096, + "rewards/rejected": -0.02456405758857727, + "step": 3710 + }, + { + "epoch": 2.680115273775216, + "grad_norm": 3.5743157863616943, + "learning_rate": 1.712328781086131e-09, + "logits/chosen": -1.5494495630264282, + "logits/rejected": -1.5334922075271606, + "logps/chosen": -50.91818618774414, + "logps/rejected": -53.10481643676758, + "loss": 0.6865, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.006211831234395504, + "rewards/margins": 0.013929562643170357, + "rewards/rejected": -0.020141394808888435, + "step": 3720 + }, + { + "epoch": 2.6873198847262247, + "grad_norm": 3.3937318325042725, + "learning_rate": 1.6369090654806543e-09, + "logits/chosen": -1.5740840435028076, + "logits/rejected": -1.5617650747299194, + "logps/chosen": -46.77784729003906, + "logps/rejected": -51.572731018066406, + "loss": 0.6847, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.007398572750389576, + "rewards/margins": 0.017527595162391663, + "rewards/rejected": -0.024926166981458664, + "step": 3730 + }, + { + "epoch": 2.6945244956772334, + "grad_norm": 3.194082498550415, + "learning_rate": 1.5631316786966498e-09, + "logits/chosen": -1.4844461679458618, + "logits/rejected": -1.4684514999389648, + "logps/chosen": -45.005489349365234, + "logps/rejected": -48.377532958984375, + "loss": 0.6851, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.005601891782134771, + "rewards/margins": 0.016944076865911484, + "rewards/rejected": -0.022545967251062393, + "step": 3740 + }, + { + "epoch": 2.701729106628242, + "grad_norm": 4.1264753341674805, + "learning_rate": 1.491001806979772e-09, + "logits/chosen": -1.5148539543151855, + "logits/rejected": -1.5002485513687134, + "logps/chosen": -50.1202507019043, + "logps/rejected": -54.22260665893555, + "loss": 0.6842, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.0015927895437926054, + "rewards/margins": 0.018657799810171127, + "rewards/rejected": -0.0202505923807621, + "step": 3750 + }, + { + "epoch": 2.7089337175792507, + "grad_norm": 3.740504264831543, + "learning_rate": 1.4205245207621508e-09, + "logits/chosen": -1.4369869232177734, + "logits/rejected": -1.421118974685669, + "logps/chosen": -52.8070068359375, + "logps/rejected": -55.55951690673828, + "loss": 0.6823, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.0012057258281856775, + "rewards/margins": 0.022528300061821938, + "rewards/rejected": -0.023734021931886673, + "step": 3760 + }, + { + "epoch": 2.7161383285302594, + "grad_norm": 3.869483232498169, + "learning_rate": 1.3517047743059978e-09, + "logits/chosen": -1.520595908164978, + "logits/rejected": -1.5225101709365845, + "logps/chosen": -49.41727066040039, + "logps/rejected": -55.34285354614258, + "loss": 0.6838, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.004268472082912922, + "rewards/margins": 0.019223999232053757, + "rewards/rejected": -0.023492468520998955, + "step": 3770 + }, + { + "epoch": 2.723342939481268, + "grad_norm": 3.2750725746154785, + "learning_rate": 1.2845474053553156e-09, + "logits/chosen": -1.5188109874725342, + "logits/rejected": -1.5104305744171143, + "logps/chosen": -43.41191101074219, + "logps/rejected": -46.98235321044922, + "loss": 0.6851, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.006257681641727686, + "rewards/margins": 0.01682373322546482, + "rewards/rejected": -0.02308141626417637, + "step": 3780 + }, + { + "epoch": 2.7305475504322767, + "grad_norm": 2.8072025775909424, + "learning_rate": 1.2190571347958422e-09, + "logits/chosen": -1.5447680950164795, + "logits/rejected": -1.5476669073104858, + "logps/chosen": -43.22075653076172, + "logps/rejected": -49.99506759643555, + "loss": 0.6846, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": 0.0003701849782373756, + "rewards/margins": 0.01762227527797222, + "rewards/rejected": -0.017252091318368912, + "step": 3790 + }, + { + "epoch": 2.7377521613832854, + "grad_norm": 2.919499635696411, + "learning_rate": 1.1552385663231634e-09, + "logits/chosen": -1.4809738397598267, + "logits/rejected": -1.4594279527664185, + "logps/chosen": -48.0673942565918, + "logps/rejected": -49.96904373168945, + "loss": 0.6853, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.004101811908185482, + "rewards/margins": 0.016451913863420486, + "rewards/rejected": -0.020553726702928543, + "step": 3800 + }, + { + "epoch": 2.744956772334294, + "grad_norm": 3.0895450115203857, + "learning_rate": 1.0930961861191302e-09, + "logits/chosen": -1.4430171251296997, + "logits/rejected": -1.44236159324646, + "logps/chosen": -46.34189224243164, + "logps/rejected": -49.78446578979492, + "loss": 0.6867, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.006359028164297342, + "rewards/margins": 0.01367940753698349, + "rewards/rejected": -0.020038435235619545, + "step": 3810 + }, + { + "epoch": 2.7521613832853027, + "grad_norm": 3.008639335632324, + "learning_rate": 1.0326343625364608e-09, + "logits/chosen": -1.4366722106933594, + "logits/rejected": -1.4210902452468872, + "logps/chosen": -46.98648452758789, + "logps/rejected": -52.36387252807617, + "loss": 0.6811, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.004181761294603348, + "rewards/margins": 0.02516576275229454, + "rewards/rejected": -0.029347527772188187, + "step": 3820 + }, + { + "epoch": 2.7593659942363113, + "grad_norm": 2.6366994380950928, + "learning_rate": 9.738573457917066e-10, + "logits/chosen": -1.5501086711883545, + "logits/rejected": -1.544032335281372, + "logps/chosen": -41.07685089111328, + "logps/rejected": -47.10578918457031, + "loss": 0.6828, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.004288278520107269, + "rewards/margins": 0.02141541615128517, + "rewards/rejected": -0.02570369280874729, + "step": 3830 + }, + { + "epoch": 2.76657060518732, + "grad_norm": 2.9577691555023193, + "learning_rate": 9.16769267666434e-10, + "logits/chosen": -1.4689620733261108, + "logits/rejected": -1.4628558158874512, + "logps/chosen": -46.15149688720703, + "logps/rejected": -48.130184173583984, + "loss": 0.6889, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.00523525383323431, + "rewards/margins": 0.00903787650167942, + "rewards/rejected": -0.01427313219755888, + "step": 3840 + }, + { + "epoch": 2.7737752161383287, + "grad_norm": 3.2562596797943115, + "learning_rate": 8.613741412168113e-10, + "logits/chosen": -1.4877763986587524, + "logits/rejected": -1.4824540615081787, + "logps/chosen": -54.22200393676758, + "logps/rejected": -58.35784149169922, + "loss": 0.6834, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.0029452387243509293, + "rewards/margins": 0.020176690071821213, + "rewards/rejected": -0.02312193065881729, + "step": 3850 + }, + { + "epoch": 2.7809798270893373, + "grad_norm": 3.3105201721191406, + "learning_rate": 8.076758604914802e-10, + "logits/chosen": -1.4479657411575317, + "logits/rejected": -1.4352147579193115, + "logps/chosen": -43.090030670166016, + "logps/rejected": -46.575626373291016, + "loss": 0.6847, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.0015796951483935118, + "rewards/margins": 0.01767609640955925, + "rewards/rejected": -0.01925579458475113, + "step": 3860 + }, + { + "epoch": 2.7881844380403455, + "grad_norm": 4.648325443267822, + "learning_rate": 7.55678200257856e-10, + "logits/chosen": -1.4449894428253174, + "logits/rejected": -1.4322887659072876, + "logps/chosen": -49.959930419921875, + "logps/rejected": -55.334449768066406, + "loss": 0.683, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.005742833949625492, + "rewards/margins": 0.021091628819704056, + "rewards/rejected": -0.026834463700652122, + "step": 3870 + }, + { + "epoch": 2.795389048991354, + "grad_norm": 3.2336835861206055, + "learning_rate": 7.053848157367315e-10, + "logits/chosen": -1.467712640762329, + "logits/rejected": -1.4539921283721924, + "logps/chosen": -48.0994987487793, + "logps/rejected": -53.06573486328125, + "loss": 0.683, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.0008336772443726659, + "rewards/margins": 0.020950373262166977, + "rewards/rejected": -0.02178405039012432, + "step": 3880 + }, + { + "epoch": 2.802593659942363, + "grad_norm": 2.563777208328247, + "learning_rate": 6.567992423453794e-10, + "logits/chosen": -1.4956997632980347, + "logits/rejected": -1.4893451929092407, + "logps/chosen": -43.29277038574219, + "logps/rejected": -46.525360107421875, + "loss": 0.6841, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.00350777106359601, + "rewards/margins": 0.018815908581018448, + "rewards/rejected": -0.02232367917895317, + "step": 3890 + }, + { + "epoch": 2.8097982708933715, + "grad_norm": 3.1397712230682373, + "learning_rate": 6.099248954489794e-10, + "logits/chosen": -1.4105771780014038, + "logits/rejected": -1.4090924263000488, + "logps/chosen": -47.864784240722656, + "logps/rejected": -53.00626754760742, + "loss": 0.6837, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.006424476858228445, + "rewards/margins": 0.019479013979434967, + "rewards/rejected": -0.02590348944067955, + "step": 3900 + }, + { + "epoch": 2.81700288184438, + "grad_norm": 3.630045175552368, + "learning_rate": 5.647650701205653e-10, + "logits/chosen": -1.5031368732452393, + "logits/rejected": -1.4833770990371704, + "logps/chosen": -54.33906173706055, + "logps/rejected": -58.169456481933594, + "loss": 0.6809, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": 0.0006442746380344033, + "rewards/margins": 0.025460779666900635, + "rewards/rejected": -0.02481650747358799, + "step": 3910 + }, + { + "epoch": 2.824207492795389, + "grad_norm": 3.1222622394561768, + "learning_rate": 5.213229409093856e-10, + "logits/chosen": -1.5364577770233154, + "logits/rejected": -1.5258610248565674, + "logps/chosen": -52.6276741027832, + "logps/rejected": -57.63709259033203, + "loss": 0.6819, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.0037466485518962145, + "rewards/margins": 0.023729201406240463, + "rewards/rejected": -0.027475852519273758, + "step": 3920 + }, + { + "epoch": 2.8314121037463975, + "grad_norm": 4.3289971351623535, + "learning_rate": 4.796015616177401e-10, + "logits/chosen": -1.4594485759735107, + "logits/rejected": -1.447249412536621, + "logps/chosen": -51.74785614013672, + "logps/rejected": -55.523780822753906, + "loss": 0.6856, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": -0.006563100032508373, + "rewards/margins": 0.015793252736330032, + "rewards/rejected": -0.02235635183751583, + "step": 3930 + }, + { + "epoch": 2.838616714697406, + "grad_norm": 3.312995672225952, + "learning_rate": 4.3960386508631595e-10, + "logits/chosen": -1.3865981101989746, + "logits/rejected": -1.386038064956665, + "logps/chosen": -42.57002639770508, + "logps/rejected": -46.51558303833008, + "loss": 0.6854, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.005880543030798435, + "rewards/margins": 0.016446929425001144, + "rewards/rejected": -0.022327473387122154, + "step": 3940 + }, + { + "epoch": 2.845821325648415, + "grad_norm": 4.776674747467041, + "learning_rate": 4.013326629880243e-10, + "logits/chosen": -1.4313756227493286, + "logits/rejected": -1.4140355587005615, + "logps/chosen": -50.08747863769531, + "logps/rejected": -53.8878059387207, + "loss": 0.6828, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.0068587446585297585, + "rewards/margins": 0.021546222269535065, + "rewards/rejected": -0.02840496599674225, + "step": 3950 + }, + { + "epoch": 2.8530259365994235, + "grad_norm": 3.445603847503662, + "learning_rate": 3.64790645630339e-10, + "logits/chosen": -1.3930222988128662, + "logits/rejected": -1.3879890441894531, + "logps/chosen": -53.27265548706055, + "logps/rejected": -55.5593376159668, + "loss": 0.6875, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.001219206373207271, + "rewards/margins": 0.011788489297032356, + "rewards/rejected": -0.01300769578665495, + "step": 3960 + }, + { + "epoch": 2.860230547550432, + "grad_norm": 4.921634674072266, + "learning_rate": 3.2998038176619e-10, + "logits/chosen": -1.4541983604431152, + "logits/rejected": -1.438096284866333, + "logps/chosen": -51.39448928833008, + "logps/rejected": -54.80561065673828, + "loss": 0.6853, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.006159276235848665, + "rewards/margins": 0.016386728733778, + "rewards/rejected": -0.022546004503965378, + "step": 3970 + }, + { + "epoch": 2.867435158501441, + "grad_norm": 3.477383613586426, + "learning_rate": 2.969043184133907e-10, + "logits/chosen": -1.5591028928756714, + "logits/rejected": -1.5578175783157349, + "logps/chosen": -44.919044494628906, + "logps/rejected": -53.242408752441406, + "loss": 0.6819, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": 0.0005782361840829253, + "rewards/margins": 0.023209361359477043, + "rewards/rejected": -0.022631125524640083, + "step": 3980 + }, + { + "epoch": 2.8746397694524495, + "grad_norm": 3.8745675086975098, + "learning_rate": 2.6556478068261447e-10, + "logits/chosen": -1.450866460800171, + "logits/rejected": -1.4369374513626099, + "logps/chosen": -44.412200927734375, + "logps/rejected": -47.863582611083984, + "loss": 0.6799, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": 0.0014529896434396505, + "rewards/margins": 0.02753259241580963, + "rewards/rejected": -0.026079604402184486, + "step": 3990 + }, + { + "epoch": 2.881844380403458, + "grad_norm": 3.4836292266845703, + "learning_rate": 2.3596397161395607e-10, + "logits/chosen": -1.560675024986267, + "logits/rejected": -1.5387744903564453, + "logps/chosen": -49.524169921875, + "logps/rejected": -54.56645584106445, + "loss": 0.6813, + "rewards/accuracies": 0.6875, + "rewards/chosen": 0.0010647645685821772, + "rewards/margins": 0.024541422724723816, + "rewards/rejected": -0.023476656526327133, + "step": 4000 + }, + { + "epoch": 2.889048991354467, + "grad_norm": 4.788309097290039, + "learning_rate": 2.0810397202206399e-10, + "logits/chosen": -1.4160195589065552, + "logits/rejected": -1.4113706350326538, + "logps/chosen": -49.815757751464844, + "logps/rejected": -53.184669494628906, + "loss": 0.6849, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.001441007712855935, + "rewards/margins": 0.017177799716591835, + "rewards/rejected": -0.015736790373921394, + "step": 4010 + }, + { + "epoch": 2.8962536023054755, + "grad_norm": 3.2461211681365967, + "learning_rate": 1.819867403498737e-10, + "logits/chosen": -1.5659189224243164, + "logits/rejected": -1.5567631721496582, + "logps/chosen": -47.738792419433594, + "logps/rejected": -51.622718811035156, + "loss": 0.6837, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.0069535574875772, + "rewards/margins": 0.019805509597063065, + "rewards/rejected": -0.026759067550301552, + "step": 4020 + }, + { + "epoch": 2.903458213256484, + "grad_norm": 3.430551052093506, + "learning_rate": 1.5761411253092382e-10, + "logits/chosen": -1.431986689567566, + "logits/rejected": -1.410321593284607, + "logps/chosen": -45.951541900634766, + "logps/rejected": -48.03018569946289, + "loss": 0.6846, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.005667565856128931, + "rewards/margins": 0.017614690586924553, + "rewards/rejected": -0.023282255977392197, + "step": 4030 + }, + { + "epoch": 2.910662824207493, + "grad_norm": 3.561012029647827, + "learning_rate": 1.3498780186031455e-10, + "logits/chosen": -1.4953620433807373, + "logits/rejected": -1.4863777160644531, + "logps/chosen": -53.52448272705078, + "logps/rejected": -57.13452911376953, + "loss": 0.6849, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.005448926705867052, + "rewards/margins": 0.017112337052822113, + "rewards/rejected": -0.022561263293027878, + "step": 4040 + }, + { + "epoch": 2.9178674351585014, + "grad_norm": 3.2866272926330566, + "learning_rate": 1.1410939887425141e-10, + "logits/chosen": -1.4994781017303467, + "logits/rejected": -1.4914522171020508, + "logps/chosen": -47.01377487182617, + "logps/rejected": -49.50941848754883, + "loss": 0.6862, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.009291494265198708, + "rewards/margins": 0.01437977235764265, + "rewards/rejected": -0.023671265691518784, + "step": 4050 + }, + { + "epoch": 2.92507204610951, + "grad_norm": 2.916045665740967, + "learning_rate": 9.498037123825686e-11, + "logits/chosen": -1.5113145112991333, + "logits/rejected": -1.5003492832183838, + "logps/chosen": -45.075767517089844, + "logps/rejected": -49.30120086669922, + "loss": 0.6835, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.00341349421069026, + "rewards/margins": 0.019889434799551964, + "rewards/rejected": -0.02330292947590351, + "step": 4060 + }, + { + "epoch": 2.9322766570605188, + "grad_norm": 3.252375841140747, + "learning_rate": 7.760206364398614e-11, + "logits/chosen": -1.5876257419586182, + "logits/rejected": -1.566359281539917, + "logps/chosen": -49.85393524169922, + "logps/rejected": -53.048728942871094, + "loss": 0.684, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.008436409756541252, + "rewards/margins": 0.019341573119163513, + "rewards/rejected": -0.027777981013059616, + "step": 4070 + }, + { + "epoch": 2.9394812680115274, + "grad_norm": 3.834636688232422, + "learning_rate": 6.19756977147029e-11, + "logits/chosen": -1.443345308303833, + "logits/rejected": -1.4344885349273682, + "logps/chosen": -47.13701629638672, + "logps/rejected": -54.29521942138672, + "loss": 0.6833, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.00852961651980877, + "rewards/margins": 0.02065226063132286, + "rewards/rejected": -0.02918187901377678, + "step": 4080 + }, + { + "epoch": 2.946685878962536, + "grad_norm": 2.8011093139648438, + "learning_rate": 4.810237191940625e-11, + "logits/chosen": -1.4434500932693481, + "logits/rejected": -1.4342488050460815, + "logps/chosen": -46.8648567199707, + "logps/rejected": -49.80231857299805, + "loss": 0.6859, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.007858057506382465, + "rewards/margins": 0.01531834714114666, + "rewards/rejected": -0.0231764055788517, + "step": 4090 + }, + { + "epoch": 2.9538904899135447, + "grad_norm": 3.364262580871582, + "learning_rate": 3.5983061495617476e-11, + "logits/chosen": -1.5273181200027466, + "logits/rejected": -1.5262796878814697, + "logps/chosen": -51.708335876464844, + "logps/rejected": -57.30168914794922, + "loss": 0.6843, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0039407601580023766, + "rewards/margins": 0.018400147557258606, + "rewards/rejected": -0.022340910509228706, + "step": 4100 + }, + { + "epoch": 2.9610951008645534, + "grad_norm": 3.0939691066741943, + "learning_rate": 2.5618618380812694e-11, + "logits/chosen": -1.52158784866333, + "logits/rejected": -1.5068227052688599, + "logps/chosen": -42.00890350341797, + "logps/rejected": -47.3350830078125, + "loss": 0.6815, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0033108622301369905, + "rewards/margins": 0.024085834622383118, + "rewards/rejected": -0.02739669755101204, + "step": 4110 + }, + { + "epoch": 2.968299711815562, + "grad_norm": 3.384092092514038, + "learning_rate": 1.700977115254576e-11, + "logits/chosen": -1.4649953842163086, + "logits/rejected": -1.4553617238998413, + "logps/chosen": -46.161407470703125, + "logps/rejected": -51.3560905456543, + "loss": 0.6834, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.00658357422798872, + "rewards/margins": 0.020199140533804893, + "rewards/rejected": -0.02678271196782589, + "step": 4120 + }, + { + "epoch": 2.9755043227665707, + "grad_norm": 2.9335947036743164, + "learning_rate": 1.0157124977230868e-11, + "logits/chosen": -1.4357291460037231, + "logits/rejected": -1.4263992309570312, + "logps/chosen": -43.5537109375, + "logps/rejected": -47.68435287475586, + "loss": 0.6847, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.0015822809655219316, + "rewards/margins": 0.01767374388873577, + "rewards/rejected": -0.019256027415394783, + "step": 4130 + }, + { + "epoch": 2.9827089337175794, + "grad_norm": 3.5399720668792725, + "learning_rate": 5.061161567596061e-12, + "logits/chosen": -1.4696056842803955, + "logits/rejected": -1.4569720029830933, + "logps/chosen": -47.70643997192383, + "logps/rejected": -50.280517578125, + "loss": 0.6846, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.00033920054556801915, + "rewards/margins": 0.017664710059762, + "rewards/rejected": -0.018003910779953003, + "step": 4140 + }, + { + "epoch": 2.989913544668588, + "grad_norm": 3.3628714084625244, + "learning_rate": 1.7222391488297406e-12, + "logits/chosen": -1.5176090002059937, + "logits/rejected": -1.5059229135513306, + "logps/chosen": -53.56816864013672, + "logps/rejected": -58.52666091918945, + "loss": 0.6797, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.0030871466733515263, + "rewards/margins": 0.028029698878526688, + "rewards/rejected": -0.031116846948862076, + "step": 4150 + }, + { + "epoch": 2.9971181556195967, + "grad_norm": 3.9223008155822754, + "learning_rate": 1.4059243338693238e-13, + "logits/chosen": -1.4434741735458374, + "logits/rejected": -1.4326165914535522, + "logps/chosen": -48.58505630493164, + "logps/rejected": -53.30755615234375, + "loss": 0.6829, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.0009518606821075082, + "rewards/margins": 0.021236242726445198, + "rewards/rejected": -0.022188100963830948, + "step": 4160 + }, + { + "epoch": 3.0, + "step": 4164, + "total_flos": 0.0, + "train_loss": 0.6881600442010548, + "train_runtime": 6925.9603, + "train_samples_per_second": 9.617, + "train_steps_per_second": 0.601 + } + ], + "logging_steps": 10, + "max_steps": 4164, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}