|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 2000, |
|
"global_step": 4168, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0002399232245681382, |
|
"grad_norm": 3.5056792836862627, |
|
"learning_rate": 1.199040767386091e-09, |
|
"logits/chosen": -1.4883875846862793, |
|
"logits/rejected": -1.416823148727417, |
|
"logps/chosen": -161.24717712402344, |
|
"logps/rejected": -175.51541137695312, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0023992322456813818, |
|
"grad_norm": 3.8174848446929266, |
|
"learning_rate": 1.199040767386091e-08, |
|
"logits/chosen": -1.6635231971740723, |
|
"logits/rejected": -1.6545089483261108, |
|
"logps/chosen": -398.12603759765625, |
|
"logps/rejected": -322.4006652832031, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": 0.00020433742611203343, |
|
"rewards/margins": 0.000947743421420455, |
|
"rewards/rejected": -0.000743405893445015, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0047984644913627635, |
|
"grad_norm": 3.863373696596945, |
|
"learning_rate": 2.398081534772182e-08, |
|
"logits/chosen": -1.6468368768692017, |
|
"logits/rejected": -1.6784213781356812, |
|
"logps/chosen": -268.8175354003906, |
|
"logps/rejected": -237.06240844726562, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0003671760787256062, |
|
"rewards/margins": 0.00017412376473657787, |
|
"rewards/rejected": 0.00019305227033328265, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007197696737044146, |
|
"grad_norm": 3.7081110480933135, |
|
"learning_rate": 3.597122302158273e-08, |
|
"logits/chosen": -1.5468581914901733, |
|
"logits/rejected": -1.542797327041626, |
|
"logps/chosen": -266.8534240722656, |
|
"logps/rejected": -267.03790283203125, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.0006165923550724983, |
|
"rewards/margins": 0.0003948546072933823, |
|
"rewards/rejected": 0.00022173782053869218, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009596928982725527, |
|
"grad_norm": 3.5686633612597114, |
|
"learning_rate": 4.796163069544364e-08, |
|
"logits/chosen": -1.6888965368270874, |
|
"logits/rejected": -1.651166319847107, |
|
"logps/chosen": -269.12921142578125, |
|
"logps/rejected": -259.73663330078125, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.001302229822613299, |
|
"rewards/margins": 0.001623004674911499, |
|
"rewards/rejected": -0.00032077505602501333, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01199616122840691, |
|
"grad_norm": 3.8364678475966993, |
|
"learning_rate": 5.995203836930455e-08, |
|
"logits/chosen": -1.7128832340240479, |
|
"logits/rejected": -1.6672801971435547, |
|
"logps/chosen": -294.93475341796875, |
|
"logps/rejected": -250.9867401123047, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0001523580722277984, |
|
"rewards/margins": -0.000665490108076483, |
|
"rewards/rejected": 0.0008178481948561966, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014395393474088292, |
|
"grad_norm": 3.908172449607729, |
|
"learning_rate": 7.194244604316546e-08, |
|
"logits/chosen": -1.5987389087677002, |
|
"logits/rejected": -1.609201192855835, |
|
"logps/chosen": -311.1792907714844, |
|
"logps/rejected": -277.0119323730469, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0004900293424725533, |
|
"rewards/margins": -1.3016722732572816e-05, |
|
"rewards/rejected": 0.0005030458560213447, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.016794625719769675, |
|
"grad_norm": 3.4877417634951255, |
|
"learning_rate": 8.393285371702638e-08, |
|
"logits/chosen": -1.6401567459106445, |
|
"logits/rejected": -1.6593749523162842, |
|
"logps/chosen": -300.8813171386719, |
|
"logps/rejected": -285.99005126953125, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0007752370438538492, |
|
"rewards/margins": 0.0012944363988935947, |
|
"rewards/rejected": -0.0005191992968320847, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.019193857965451054, |
|
"grad_norm": 3.8477961171736634, |
|
"learning_rate": 9.592326139088728e-08, |
|
"logits/chosen": -1.5876344442367554, |
|
"logits/rejected": -1.619933843612671, |
|
"logps/chosen": -222.11495971679688, |
|
"logps/rejected": -259.1878967285156, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.0002348280104342848, |
|
"rewards/margins": -0.0016285456949844956, |
|
"rewards/rejected": 0.00186337367631495, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.021593090211132437, |
|
"grad_norm": 3.658568799391329, |
|
"learning_rate": 1.0791366906474819e-07, |
|
"logits/chosen": -1.6128196716308594, |
|
"logits/rejected": -1.612391471862793, |
|
"logps/chosen": -364.6415710449219, |
|
"logps/rejected": -313.7099914550781, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.00037602329393848777, |
|
"rewards/margins": 0.0007518329657614231, |
|
"rewards/rejected": -0.00112785620149225, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02399232245681382, |
|
"grad_norm": 3.8164854638361025, |
|
"learning_rate": 1.199040767386091e-07, |
|
"logits/chosen": -1.6457388401031494, |
|
"logits/rejected": -1.6554689407348633, |
|
"logps/chosen": -280.8997802734375, |
|
"logps/rejected": -295.6603698730469, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.001076525659300387, |
|
"rewards/margins": -0.00022420981258619577, |
|
"rewards/rejected": -0.0008523158612661064, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.026391554702495202, |
|
"grad_norm": 3.251687359051961, |
|
"learning_rate": 1.3189448441247004e-07, |
|
"logits/chosen": -1.6135714054107666, |
|
"logits/rejected": -1.6114752292633057, |
|
"logps/chosen": -247.7429962158203, |
|
"logps/rejected": -244.31112670898438, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0007790708914399147, |
|
"rewards/margins": 0.0011699094902724028, |
|
"rewards/rejected": -0.0019489802652969956, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.028790786948176585, |
|
"grad_norm": 3.6767857848694434, |
|
"learning_rate": 1.4388489208633092e-07, |
|
"logits/chosen": -1.643938660621643, |
|
"logits/rejected": -1.6287577152252197, |
|
"logps/chosen": -324.9435729980469, |
|
"logps/rejected": -294.64166259765625, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.0025485847145318985, |
|
"rewards/margins": -0.0018857631366699934, |
|
"rewards/rejected": -0.0006628216942772269, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.031190019193857964, |
|
"grad_norm": 3.378467310139603, |
|
"learning_rate": 1.5587529976019183e-07, |
|
"logits/chosen": -1.5718883275985718, |
|
"logits/rejected": -1.5992323160171509, |
|
"logps/chosen": -236.16085815429688, |
|
"logps/rejected": -323.8802795410156, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0005537395481951535, |
|
"rewards/margins": 0.0013779096771031618, |
|
"rewards/rejected": -0.0019316490506753325, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03358925143953935, |
|
"grad_norm": 3.5565622632521476, |
|
"learning_rate": 1.6786570743405277e-07, |
|
"logits/chosen": -1.6049950122833252, |
|
"logits/rejected": -1.670636773109436, |
|
"logps/chosen": -302.82330322265625, |
|
"logps/rejected": -293.5691223144531, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0013237579260021448, |
|
"rewards/margins": 0.0033771514426916838, |
|
"rewards/rejected": -0.004700910300016403, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03598848368522073, |
|
"grad_norm": 3.7241887452710385, |
|
"learning_rate": 1.7985611510791365e-07, |
|
"logits/chosen": -1.6608047485351562, |
|
"logits/rejected": -1.6172984838485718, |
|
"logps/chosen": -247.96939086914062, |
|
"logps/rejected": -241.17111206054688, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.001827596453949809, |
|
"rewards/margins": 0.0031149538699537516, |
|
"rewards/rejected": -0.004942550323903561, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03838771593090211, |
|
"grad_norm": 3.7650474405725984, |
|
"learning_rate": 1.9184652278177456e-07, |
|
"logits/chosen": -1.623676061630249, |
|
"logits/rejected": -1.58232843875885, |
|
"logps/chosen": -320.1465759277344, |
|
"logps/rejected": -246.5814971923828, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0039003852289170027, |
|
"rewards/margins": 8.531531420885585e-06, |
|
"rewards/rejected": -0.003908916376531124, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.040786948176583494, |
|
"grad_norm": 3.3742190114845125, |
|
"learning_rate": 2.038369304556355e-07, |
|
"logits/chosen": -1.6414167881011963, |
|
"logits/rejected": -1.6517263650894165, |
|
"logps/chosen": -365.78070068359375, |
|
"logps/rejected": -355.07684326171875, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.003375888569280505, |
|
"rewards/margins": 0.003419106360524893, |
|
"rewards/rejected": -0.006794995162636042, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04318618042226487, |
|
"grad_norm": 3.893923672357319, |
|
"learning_rate": 2.1582733812949638e-07, |
|
"logits/chosen": -1.6334331035614014, |
|
"logits/rejected": -1.6182676553726196, |
|
"logps/chosen": -249.95315551757812, |
|
"logps/rejected": -243.65365600585938, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.003182569518685341, |
|
"rewards/margins": 0.005136819090694189, |
|
"rewards/rejected": -0.008319388143718243, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04558541266794626, |
|
"grad_norm": 4.328005991935876, |
|
"learning_rate": 2.278177458033573e-07, |
|
"logits/chosen": -1.59438157081604, |
|
"logits/rejected": -1.598433256149292, |
|
"logps/chosen": -328.0663757324219, |
|
"logps/rejected": -265.6521911621094, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.005676161497831345, |
|
"rewards/margins": 0.00224656006321311, |
|
"rewards/rejected": -0.007922721095383167, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04798464491362764, |
|
"grad_norm": 3.4669166404466267, |
|
"learning_rate": 2.398081534772182e-07, |
|
"logits/chosen": -1.715904951095581, |
|
"logits/rejected": -1.6798690557479858, |
|
"logps/chosen": -330.5942687988281, |
|
"logps/rejected": -312.1226806640625, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.007675034459680319, |
|
"rewards/margins": 0.004218742251396179, |
|
"rewards/rejected": -0.011893777176737785, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05038387715930902, |
|
"grad_norm": 3.550601001859104, |
|
"learning_rate": 2.517985611510791e-07, |
|
"logits/chosen": -1.6995878219604492, |
|
"logits/rejected": -1.7092031240463257, |
|
"logps/chosen": -247.4270477294922, |
|
"logps/rejected": -271.40960693359375, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.006873033009469509, |
|
"rewards/margins": 0.006390347145497799, |
|
"rewards/rejected": -0.013263382017612457, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.052783109404990404, |
|
"grad_norm": 3.5506079722939763, |
|
"learning_rate": 2.637889688249401e-07, |
|
"logits/chosen": -1.6824872493743896, |
|
"logits/rejected": -1.692633032798767, |
|
"logps/chosen": -331.85870361328125, |
|
"logps/rejected": -327.26947021484375, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.012020569294691086, |
|
"rewards/margins": 0.00552480760961771, |
|
"rewards/rejected": -0.01754537597298622, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05518234165067178, |
|
"grad_norm": 3.7749557114917605, |
|
"learning_rate": 2.7577937649880093e-07, |
|
"logits/chosen": -1.6799322366714478, |
|
"logits/rejected": -1.7364540100097656, |
|
"logps/chosen": -252.82473754882812, |
|
"logps/rejected": -288.5075378417969, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.011706655845046043, |
|
"rewards/margins": 0.01080317609012127, |
|
"rewards/rejected": -0.022509830072522163, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05758157389635317, |
|
"grad_norm": 4.1857193206793015, |
|
"learning_rate": 2.8776978417266184e-07, |
|
"logits/chosen": -1.6128301620483398, |
|
"logits/rejected": -1.5980497598648071, |
|
"logps/chosen": -320.16375732421875, |
|
"logps/rejected": -270.80133056640625, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.010916762053966522, |
|
"rewards/margins": 0.015229749493300915, |
|
"rewards/rejected": -0.026146510615944862, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05998080614203455, |
|
"grad_norm": 3.8497859603369804, |
|
"learning_rate": 2.997601918465228e-07, |
|
"logits/chosen": -1.6171119213104248, |
|
"logits/rejected": -1.5841248035430908, |
|
"logps/chosen": -254.93936157226562, |
|
"logps/rejected": -243.93179321289062, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.01718009077012539, |
|
"rewards/margins": 0.009010560810565948, |
|
"rewards/rejected": -0.026190653443336487, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06238003838771593, |
|
"grad_norm": 3.705368784982066, |
|
"learning_rate": 3.1175059952038366e-07, |
|
"logits/chosen": -1.6593215465545654, |
|
"logits/rejected": -1.6591377258300781, |
|
"logps/chosen": -282.84100341796875, |
|
"logps/rejected": -276.47125244140625, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.01674341782927513, |
|
"rewards/margins": 0.015696872025728226, |
|
"rewards/rejected": -0.03244028985500336, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0647792706333973, |
|
"grad_norm": 3.601297170964158, |
|
"learning_rate": 3.2374100719424457e-07, |
|
"logits/chosen": -1.536478042602539, |
|
"logits/rejected": -1.5124893188476562, |
|
"logps/chosen": -303.76104736328125, |
|
"logps/rejected": -241.261474609375, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.017451269552111626, |
|
"rewards/margins": 0.009415589272975922, |
|
"rewards/rejected": -0.026866856962442398, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0671785028790787, |
|
"grad_norm": 3.9036742193164975, |
|
"learning_rate": 3.3573141486810554e-07, |
|
"logits/chosen": -1.5547986030578613, |
|
"logits/rejected": -1.5795847177505493, |
|
"logps/chosen": -316.18121337890625, |
|
"logps/rejected": -299.7501220703125, |
|
"loss": 0.6794, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.02595636248588562, |
|
"rewards/margins": 0.026472270488739014, |
|
"rewards/rejected": -0.052428632974624634, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06957773512476008, |
|
"grad_norm": 3.3624197530231363, |
|
"learning_rate": 3.477218225419664e-07, |
|
"logits/chosen": -1.6303586959838867, |
|
"logits/rejected": -1.6242969036102295, |
|
"logps/chosen": -305.33953857421875, |
|
"logps/rejected": -274.9311218261719, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.027737725526094437, |
|
"rewards/margins": 0.02691362239420414, |
|
"rewards/rejected": -0.05465134233236313, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07197696737044146, |
|
"grad_norm": 4.058047991319342, |
|
"learning_rate": 3.597122302158273e-07, |
|
"logits/chosen": -1.6332571506500244, |
|
"logits/rejected": -1.6581714153289795, |
|
"logps/chosen": -276.0576171875, |
|
"logps/rejected": -290.2926940917969, |
|
"loss": 0.6784, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.04013335332274437, |
|
"rewards/margins": 0.030186835676431656, |
|
"rewards/rejected": -0.07032018899917603, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07437619961612284, |
|
"grad_norm": 3.4101110193570454, |
|
"learning_rate": 3.7170263788968827e-07, |
|
"logits/chosen": -1.6190553903579712, |
|
"logits/rejected": -1.6291663646697998, |
|
"logps/chosen": -292.63751220703125, |
|
"logps/rejected": -243.106201171875, |
|
"loss": 0.6808, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.040502794086933136, |
|
"rewards/margins": 0.03982759267091751, |
|
"rewards/rejected": -0.08033039420843124, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07677543186180422, |
|
"grad_norm": 3.351913067776505, |
|
"learning_rate": 3.836930455635491e-07, |
|
"logits/chosen": -1.6056486368179321, |
|
"logits/rejected": -1.5875630378723145, |
|
"logps/chosen": -295.599853515625, |
|
"logps/rejected": -259.6369934082031, |
|
"loss": 0.6774, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0446627140045166, |
|
"rewards/margins": 0.02809850312769413, |
|
"rewards/rejected": -0.07276121526956558, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07917466410748561, |
|
"grad_norm": 3.4017416105630573, |
|
"learning_rate": 3.9568345323741003e-07, |
|
"logits/chosen": -1.5247066020965576, |
|
"logits/rejected": -1.5482518672943115, |
|
"logps/chosen": -268.7400207519531, |
|
"logps/rejected": -309.99786376953125, |
|
"loss": 0.6729, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.0597003698348999, |
|
"rewards/margins": 0.05082261562347412, |
|
"rewards/rejected": -0.11052300035953522, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08157389635316699, |
|
"grad_norm": 3.3527499070225244, |
|
"learning_rate": 4.07673860911271e-07, |
|
"logits/chosen": -1.6196010112762451, |
|
"logits/rejected": -1.595496416091919, |
|
"logps/chosen": -257.72515869140625, |
|
"logps/rejected": -282.81781005859375, |
|
"loss": 0.6716, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.05155477672815323, |
|
"rewards/margins": 0.06573888659477234, |
|
"rewards/rejected": -0.11729365587234497, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08397312859884837, |
|
"grad_norm": 3.8502979336068717, |
|
"learning_rate": 4.1966426858513185e-07, |
|
"logits/chosen": -1.6366294622421265, |
|
"logits/rejected": -1.6474313735961914, |
|
"logps/chosen": -299.79010009765625, |
|
"logps/rejected": -299.626220703125, |
|
"loss": 0.6741, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.09185166656970978, |
|
"rewards/margins": 0.03506358712911606, |
|
"rewards/rejected": -0.12691523134708405, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08637236084452975, |
|
"grad_norm": 4.087127125149584, |
|
"learning_rate": 4.3165467625899276e-07, |
|
"logits/chosen": -1.6223100423812866, |
|
"logits/rejected": -1.5971088409423828, |
|
"logps/chosen": -292.7254333496094, |
|
"logps/rejected": -240.09213256835938, |
|
"loss": 0.6693, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.10348290205001831, |
|
"rewards/margins": 0.053151585161685944, |
|
"rewards/rejected": -0.15663447976112366, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08877159309021113, |
|
"grad_norm": 3.962698259273254, |
|
"learning_rate": 4.436450839328537e-07, |
|
"logits/chosen": -1.5964925289154053, |
|
"logits/rejected": -1.617010474205017, |
|
"logps/chosen": -265.0798034667969, |
|
"logps/rejected": -278.52117919921875, |
|
"loss": 0.6651, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1335074007511139, |
|
"rewards/margins": 0.0590825080871582, |
|
"rewards/rejected": -0.1925898939371109, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09117082533589252, |
|
"grad_norm": 3.5033230698831668, |
|
"learning_rate": 4.556354916067146e-07, |
|
"logits/chosen": -1.5588051080703735, |
|
"logits/rejected": -1.5864968299865723, |
|
"logps/chosen": -267.39471435546875, |
|
"logps/rejected": -266.3304443359375, |
|
"loss": 0.6546, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.09075454622507095, |
|
"rewards/margins": 0.07683941721916199, |
|
"rewards/rejected": -0.16759395599365234, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.0935700575815739, |
|
"grad_norm": 3.9053311567068993, |
|
"learning_rate": 4.676258992805755e-07, |
|
"logits/chosen": -1.5949430465698242, |
|
"logits/rejected": -1.5674622058868408, |
|
"logps/chosen": -301.2566833496094, |
|
"logps/rejected": -272.92877197265625, |
|
"loss": 0.6529, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.16490274667739868, |
|
"rewards/margins": 0.055955369025468826, |
|
"rewards/rejected": -0.2208581268787384, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09596928982725528, |
|
"grad_norm": 3.9140466546998827, |
|
"learning_rate": 4.796163069544364e-07, |
|
"logits/chosen": -1.6508136987686157, |
|
"logits/rejected": -1.6695753335952759, |
|
"logps/chosen": -300.67315673828125, |
|
"logps/rejected": -291.95501708984375, |
|
"loss": 0.6473, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.21914562582969666, |
|
"rewards/margins": 0.147565096616745, |
|
"rewards/rejected": -0.36671072244644165, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09836852207293666, |
|
"grad_norm": 4.42678932238528, |
|
"learning_rate": 4.916067146282974e-07, |
|
"logits/chosen": -1.5839338302612305, |
|
"logits/rejected": -1.5947954654693604, |
|
"logps/chosen": -296.02362060546875, |
|
"logps/rejected": -332.89208984375, |
|
"loss": 0.6292, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3386813700199127, |
|
"rewards/margins": 0.144867405295372, |
|
"rewards/rejected": -0.4835488200187683, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10076775431861804, |
|
"grad_norm": 5.2789138321700495, |
|
"learning_rate": 4.999992108529978e-07, |
|
"logits/chosen": -1.5680617094039917, |
|
"logits/rejected": -1.5661697387695312, |
|
"logps/chosen": -414.2063903808594, |
|
"logps/rejected": -393.57598876953125, |
|
"loss": 0.6447, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5759106874465942, |
|
"rewards/margins": 0.245382621884346, |
|
"rewards/rejected": -0.8212932348251343, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.10316698656429943, |
|
"grad_norm": 4.983795366931148, |
|
"learning_rate": 4.999851817115532e-07, |
|
"logits/chosen": -1.634783387184143, |
|
"logits/rejected": -1.6127817630767822, |
|
"logps/chosen": -324.0008850097656, |
|
"logps/rejected": -326.49810791015625, |
|
"loss": 0.6439, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.47741881012916565, |
|
"rewards/margins": 0.19091394543647766, |
|
"rewards/rejected": -0.6683326959609985, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.10556621880998081, |
|
"grad_norm": 3.631330415762284, |
|
"learning_rate": 4.999536171027889e-07, |
|
"logits/chosen": -1.6043802499771118, |
|
"logits/rejected": -1.597813367843628, |
|
"logps/chosen": -332.45391845703125, |
|
"logps/rejected": -329.4422912597656, |
|
"loss": 0.6432, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5105425119400024, |
|
"rewards/margins": 0.08223161846399307, |
|
"rewards/rejected": -0.5927742123603821, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.10796545105566219, |
|
"grad_norm": 4.090138150318208, |
|
"learning_rate": 4.999045192408369e-07, |
|
"logits/chosen": -1.506775140762329, |
|
"logits/rejected": -1.4702590703964233, |
|
"logps/chosen": -296.05792236328125, |
|
"logps/rejected": -289.3249816894531, |
|
"loss": 0.6316, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.3827100694179535, |
|
"rewards/margins": 0.15496531128883362, |
|
"rewards/rejected": -0.5376753807067871, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11036468330134357, |
|
"grad_norm": 4.139361867296913, |
|
"learning_rate": 4.998378915697171e-07, |
|
"logits/chosen": -1.569830298423767, |
|
"logits/rejected": -1.5698078870773315, |
|
"logps/chosen": -325.01458740234375, |
|
"logps/rejected": -337.7842712402344, |
|
"loss": 0.6148, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2572017014026642, |
|
"rewards/margins": 0.24531395733356476, |
|
"rewards/rejected": -0.5025156140327454, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11276391554702495, |
|
"grad_norm": 4.5407974858594455, |
|
"learning_rate": 4.997537387630958e-07, |
|
"logits/chosen": -1.53157639503479, |
|
"logits/rejected": -1.498214840888977, |
|
"logps/chosen": -273.22802734375, |
|
"logps/rejected": -292.3331298828125, |
|
"loss": 0.6057, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3938341736793518, |
|
"rewards/margins": 0.17262205481529236, |
|
"rewards/rejected": -0.5664561986923218, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.11516314779270634, |
|
"grad_norm": 4.783089704109638, |
|
"learning_rate": 4.996520667239582e-07, |
|
"logits/chosen": -1.5579659938812256, |
|
"logits/rejected": -1.5760862827301025, |
|
"logps/chosen": -306.2027282714844, |
|
"logps/rejected": -380.0113525390625, |
|
"loss": 0.6096, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.42028918862342834, |
|
"rewards/margins": 0.2578314542770386, |
|
"rewards/rejected": -0.6781206727027893, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.11756238003838772, |
|
"grad_norm": 4.8302096742139105, |
|
"learning_rate": 4.995328825841939e-07, |
|
"logits/chosen": -1.5267994403839111, |
|
"logits/rejected": -1.5366883277893066, |
|
"logps/chosen": -285.08990478515625, |
|
"logps/rejected": -315.9127197265625, |
|
"loss": 0.5931, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.34838151931762695, |
|
"rewards/margins": 0.4001844525337219, |
|
"rewards/rejected": -0.7485659122467041, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1199616122840691, |
|
"grad_norm": 4.655267788061174, |
|
"learning_rate": 4.993961947040967e-07, |
|
"logits/chosen": -1.5378767251968384, |
|
"logits/rejected": -1.5352329015731812, |
|
"logps/chosen": -361.957275390625, |
|
"logps/rejected": -334.80194091796875, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6517360806465149, |
|
"rewards/margins": 0.10645874589681625, |
|
"rewards/rejected": -0.7581947445869446, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12236084452975048, |
|
"grad_norm": 4.03890292179432, |
|
"learning_rate": 4.992420126717784e-07, |
|
"logits/chosen": -1.5456221103668213, |
|
"logits/rejected": -1.5595461130142212, |
|
"logps/chosen": -313.9156188964844, |
|
"logps/rejected": -357.70074462890625, |
|
"loss": 0.607, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3863525092601776, |
|
"rewards/margins": 0.5194737911224365, |
|
"rewards/rejected": -0.9058262705802917, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.12476007677543186, |
|
"grad_norm": 4.515523592913458, |
|
"learning_rate": 4.990703473024958e-07, |
|
"logits/chosen": -1.5077584981918335, |
|
"logits/rejected": -1.508772611618042, |
|
"logps/chosen": -363.2955322265625, |
|
"logps/rejected": -380.5810852050781, |
|
"loss": 0.6185, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5841118097305298, |
|
"rewards/margins": 0.2735467553138733, |
|
"rewards/rejected": -0.8576586842536926, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12715930902111325, |
|
"grad_norm": 4.68328497340501, |
|
"learning_rate": 4.98881210637893e-07, |
|
"logits/chosen": -1.4954583644866943, |
|
"logits/rejected": -1.5232939720153809, |
|
"logps/chosen": -274.912353515625, |
|
"logps/rejected": -337.65228271484375, |
|
"loss": 0.6211, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3999376893043518, |
|
"rewards/margins": 0.3088452219963074, |
|
"rewards/rejected": -0.7087828516960144, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1295585412667946, |
|
"grad_norm": 5.719715088463468, |
|
"learning_rate": 4.986746159451553e-07, |
|
"logits/chosen": -1.5417931079864502, |
|
"logits/rejected": -1.5638864040374756, |
|
"logps/chosen": -317.2779846191406, |
|
"logps/rejected": -345.56207275390625, |
|
"loss": 0.6037, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.46498027443885803, |
|
"rewards/margins": 0.4305883049964905, |
|
"rewards/rejected": -0.8955684900283813, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.131957773512476, |
|
"grad_norm": 3.924618462296512, |
|
"learning_rate": 4.984505777160795e-07, |
|
"logits/chosen": -1.4803025722503662, |
|
"logits/rejected": -1.5034992694854736, |
|
"logps/chosen": -383.60009765625, |
|
"logps/rejected": -410.09637451171875, |
|
"loss": 0.6196, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.622631847858429, |
|
"rewards/margins": 0.29748308658599854, |
|
"rewards/rejected": -0.9201149940490723, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1343570057581574, |
|
"grad_norm": 5.000411140737786, |
|
"learning_rate": 4.982091116660574e-07, |
|
"logits/chosen": -1.5640392303466797, |
|
"logits/rejected": -1.5526823997497559, |
|
"logps/chosen": -262.2401123046875, |
|
"logps/rejected": -251.67489624023438, |
|
"loss": 0.6329, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.4863004684448242, |
|
"rewards/margins": 0.13453409075737, |
|
"rewards/rejected": -0.6208345890045166, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.13675623800383876, |
|
"grad_norm": 5.426495125083168, |
|
"learning_rate": 4.979502347329732e-07, |
|
"logits/chosen": -1.545018196105957, |
|
"logits/rejected": -1.5341848134994507, |
|
"logps/chosen": -359.99212646484375, |
|
"logps/rejected": -430.8915100097656, |
|
"loss": 0.61, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6372441053390503, |
|
"rewards/margins": 0.4039608836174011, |
|
"rewards/rejected": -1.0412050485610962, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.13915547024952016, |
|
"grad_norm": 4.977101807860706, |
|
"learning_rate": 4.976739650760151e-07, |
|
"logits/chosen": -1.6066020727157593, |
|
"logits/rejected": -1.5763802528381348, |
|
"logps/chosen": -331.3481750488281, |
|
"logps/rejected": -358.0259704589844, |
|
"loss": 0.5891, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5530639886856079, |
|
"rewards/margins": 0.3901999890804291, |
|
"rewards/rejected": -0.9432638883590698, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14155470249520152, |
|
"grad_norm": 6.414020548691111, |
|
"learning_rate": 4.97380322074402e-07, |
|
"logits/chosen": -1.5697975158691406, |
|
"logits/rejected": -1.545709252357483, |
|
"logps/chosen": -320.78094482421875, |
|
"logps/rejected": -346.2607116699219, |
|
"loss": 0.6312, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8397436141967773, |
|
"rewards/margins": 0.30745047330856323, |
|
"rewards/rejected": -1.1471941471099854, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.14395393474088292, |
|
"grad_norm": 5.342440443936391, |
|
"learning_rate": 4.970693263260237e-07, |
|
"logits/chosen": -1.4645593166351318, |
|
"logits/rejected": -1.4802438020706177, |
|
"logps/chosen": -373.068359375, |
|
"logps/rejected": -366.2574157714844, |
|
"loss": 0.6067, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.551115870475769, |
|
"rewards/margins": 0.47067826986312866, |
|
"rewards/rejected": -1.021794080734253, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1463531669865643, |
|
"grad_norm": 5.144967895606553, |
|
"learning_rate": 4.967409996459966e-07, |
|
"logits/chosen": -1.6260372400283813, |
|
"logits/rejected": -1.6305965185165405, |
|
"logps/chosen": -339.40875244140625, |
|
"logps/rejected": -351.1748046875, |
|
"loss": 0.593, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5743804574012756, |
|
"rewards/margins": 0.37975504994392395, |
|
"rewards/rejected": -0.954135537147522, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.14875239923224567, |
|
"grad_norm": 5.888969372392584, |
|
"learning_rate": 4.963953650651326e-07, |
|
"logits/chosen": -1.5199607610702515, |
|
"logits/rejected": -1.4783246517181396, |
|
"logps/chosen": -454.8645935058594, |
|
"logps/rejected": -392.8035888671875, |
|
"loss": 0.5956, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8411262631416321, |
|
"rewards/margins": 0.4142216742038727, |
|
"rewards/rejected": -1.2553479671478271, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.15115163147792707, |
|
"grad_norm": 5.241290267394913, |
|
"learning_rate": 4.960324468283248e-07, |
|
"logits/chosen": -1.5408384799957275, |
|
"logits/rejected": -1.5691133737564087, |
|
"logps/chosen": -291.14508056640625, |
|
"logps/rejected": -330.90277099609375, |
|
"loss": 0.5691, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5521666407585144, |
|
"rewards/margins": 0.4039185047149658, |
|
"rewards/rejected": -0.956085205078125, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.15355086372360843, |
|
"grad_norm": 6.034628988888004, |
|
"learning_rate": 4.956522703928451e-07, |
|
"logits/chosen": -1.5112297534942627, |
|
"logits/rejected": -1.5096272230148315, |
|
"logps/chosen": -317.29400634765625, |
|
"logps/rejected": -367.8897705078125, |
|
"loss": 0.5808, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6753019690513611, |
|
"rewards/margins": 0.42325735092163086, |
|
"rewards/rejected": -1.0985593795776367, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.15595009596928983, |
|
"grad_norm": 6.06042461193669, |
|
"learning_rate": 4.952548624265606e-07, |
|
"logits/chosen": -1.462241530418396, |
|
"logits/rejected": -1.4188714027404785, |
|
"logps/chosen": -373.4043884277344, |
|
"logps/rejected": -387.77923583984375, |
|
"loss": 0.6217, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8461149334907532, |
|
"rewards/margins": 0.27142664790153503, |
|
"rewards/rejected": -1.1175415515899658, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.15834932821497122, |
|
"grad_norm": 4.638041821009946, |
|
"learning_rate": 4.948402508060607e-07, |
|
"logits/chosen": -1.532447338104248, |
|
"logits/rejected": -1.5247899293899536, |
|
"logps/chosen": -303.63494873046875, |
|
"logps/rejected": -343.4326171875, |
|
"loss": 0.6065, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5687133073806763, |
|
"rewards/margins": 0.5383566617965698, |
|
"rewards/rejected": -1.107069969177246, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16074856046065258, |
|
"grad_norm": 4.7563938394985525, |
|
"learning_rate": 4.944084646147038e-07, |
|
"logits/chosen": -1.5980417728424072, |
|
"logits/rejected": -1.5698829889297485, |
|
"logps/chosen": -385.9518737792969, |
|
"logps/rejected": -372.51214599609375, |
|
"loss": 0.6372, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5248526930809021, |
|
"rewards/margins": 0.12505970895290375, |
|
"rewards/rejected": -0.649912416934967, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.16314779270633398, |
|
"grad_norm": 4.759002323226384, |
|
"learning_rate": 4.939595341405754e-07, |
|
"logits/chosen": -1.5244510173797607, |
|
"logits/rejected": -1.5224545001983643, |
|
"logps/chosen": -313.8047180175781, |
|
"logps/rejected": -315.26263427734375, |
|
"loss": 0.6141, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4212771952152252, |
|
"rewards/margins": 0.25005480647087097, |
|
"rewards/rejected": -0.671332061290741, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.16554702495201534, |
|
"grad_norm": 5.223029601556046, |
|
"learning_rate": 4.93493490874365e-07, |
|
"logits/chosen": -1.479612946510315, |
|
"logits/rejected": -1.4728684425354004, |
|
"logps/chosen": -311.44775390625, |
|
"logps/rejected": -350.54779052734375, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.564383864402771, |
|
"rewards/margins": 0.3139537572860718, |
|
"rewards/rejected": -0.8783376812934875, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.16794625719769674, |
|
"grad_norm": 5.40070237050068, |
|
"learning_rate": 4.93010367507156e-07, |
|
"logits/chosen": -1.4617944955825806, |
|
"logits/rejected": -1.4599894285202026, |
|
"logps/chosen": -294.10797119140625, |
|
"logps/rejected": -335.54705810546875, |
|
"loss": 0.5637, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6340052485466003, |
|
"rewards/margins": 0.6616395711898804, |
|
"rewards/rejected": -1.2956448793411255, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.17034548944337813, |
|
"grad_norm": 6.012195841421261, |
|
"learning_rate": 4.925101979281332e-07, |
|
"logits/chosen": -1.4645987749099731, |
|
"logits/rejected": -1.453375220298767, |
|
"logps/chosen": -395.05718994140625, |
|
"logps/rejected": -391.1450500488281, |
|
"loss": 0.6028, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7132852077484131, |
|
"rewards/margins": 0.5267443060874939, |
|
"rewards/rejected": -1.2400295734405518, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1727447216890595, |
|
"grad_norm": 5.2059132091980596, |
|
"learning_rate": 4.919930172222054e-07, |
|
"logits/chosen": -1.5603594779968262, |
|
"logits/rejected": -1.5616681575775146, |
|
"logps/chosen": -337.87713623046875, |
|
"logps/rejected": -388.90130615234375, |
|
"loss": 0.5578, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.7377550005912781, |
|
"rewards/margins": 0.5399189591407776, |
|
"rewards/rejected": -1.2776739597320557, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.1751439539347409, |
|
"grad_norm": 11.822921470671519, |
|
"learning_rate": 4.914588616675445e-07, |
|
"logits/chosen": -1.5943918228149414, |
|
"logits/rejected": -1.606041669845581, |
|
"logps/chosen": -346.92205810546875, |
|
"logps/rejected": -378.21240234375, |
|
"loss": 0.6279, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8934835195541382, |
|
"rewards/margins": 0.5335273146629333, |
|
"rewards/rejected": -1.4270107746124268, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.17754318618042225, |
|
"grad_norm": 5.211631403180053, |
|
"learning_rate": 4.909077687330404e-07, |
|
"logits/chosen": -1.5509735345840454, |
|
"logits/rejected": -1.4975712299346924, |
|
"logps/chosen": -349.6174011230469, |
|
"logps/rejected": -349.61492919921875, |
|
"loss": 0.5735, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6521992087364197, |
|
"rewards/margins": 0.30101969838142395, |
|
"rewards/rejected": -0.9532188177108765, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.17994241842610365, |
|
"grad_norm": 5.432660278163866, |
|
"learning_rate": 4.903397770756729e-07, |
|
"logits/chosen": -1.5139765739440918, |
|
"logits/rejected": -1.5181429386138916, |
|
"logps/chosen": -341.566162109375, |
|
"logps/rejected": -375.05828857421875, |
|
"loss": 0.5879, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6539328098297119, |
|
"rewards/margins": 0.38768962025642395, |
|
"rewards/rejected": -1.0416224002838135, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.18234165067178504, |
|
"grad_norm": 5.539798409070412, |
|
"learning_rate": 4.897549265378004e-07, |
|
"logits/chosen": -1.5405504703521729, |
|
"logits/rejected": -1.4953763484954834, |
|
"logps/chosen": -427.6753845214844, |
|
"logps/rejected": -441.68365478515625, |
|
"loss": 0.5945, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8601313829421997, |
|
"rewards/margins": 0.2185048609972, |
|
"rewards/rejected": -1.0786362886428833, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.1847408829174664, |
|
"grad_norm": 6.131059558759629, |
|
"learning_rate": 4.891532581443643e-07, |
|
"logits/chosen": -1.4995934963226318, |
|
"logits/rejected": -1.4943822622299194, |
|
"logps/chosen": -385.32073974609375, |
|
"logps/rejected": -454.878173828125, |
|
"loss": 0.5706, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6404799222946167, |
|
"rewards/margins": 0.7536662817001343, |
|
"rewards/rejected": -1.394146203994751, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.1871401151631478, |
|
"grad_norm": 7.531135867694059, |
|
"learning_rate": 4.885348141000122e-07, |
|
"logits/chosen": -1.5458956956863403, |
|
"logits/rejected": -1.5822408199310303, |
|
"logps/chosen": -344.25244140625, |
|
"logps/rejected": -396.47247314453125, |
|
"loss": 0.5803, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8405560255050659, |
|
"rewards/margins": 0.45578351616859436, |
|
"rewards/rejected": -1.296339511871338, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.18953934740882916, |
|
"grad_norm": 6.102690327063579, |
|
"learning_rate": 4.878996377861367e-07, |
|
"logits/chosen": -1.5642445087432861, |
|
"logits/rejected": -1.5695910453796387, |
|
"logps/chosen": -318.0394592285156, |
|
"logps/rejected": -365.24224853515625, |
|
"loss": 0.5556, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8526245951652527, |
|
"rewards/margins": 0.4370867609977722, |
|
"rewards/rejected": -1.2897112369537354, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.19193857965451055, |
|
"grad_norm": 6.832280804361678, |
|
"learning_rate": 4.872477737578327e-07, |
|
"logits/chosen": -1.4795372486114502, |
|
"logits/rejected": -1.4752862453460693, |
|
"logps/chosen": -411.67425537109375, |
|
"logps/rejected": -499.11505126953125, |
|
"loss": 0.5499, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0738346576690674, |
|
"rewards/margins": 1.004138708114624, |
|
"rewards/rejected": -2.0779738426208496, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.19433781190019195, |
|
"grad_norm": 8.284630220336107, |
|
"learning_rate": 4.865792677407718e-07, |
|
"logits/chosen": -1.593660593032837, |
|
"logits/rejected": -1.5664924383163452, |
|
"logps/chosen": -356.3196105957031, |
|
"logps/rejected": -367.1153564453125, |
|
"loss": 0.5915, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9886842966079712, |
|
"rewards/margins": 0.3350405991077423, |
|
"rewards/rejected": -1.3237249851226807, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.1967370441458733, |
|
"grad_norm": 5.96781876097138, |
|
"learning_rate": 4.858941666279955e-07, |
|
"logits/chosen": -1.6477540731430054, |
|
"logits/rejected": -1.6294025182724, |
|
"logps/chosen": -371.42059326171875, |
|
"logps/rejected": -375.2044982910156, |
|
"loss": 0.5992, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.831330418586731, |
|
"rewards/margins": 0.20967264473438263, |
|
"rewards/rejected": -1.0410031080245972, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.1991362763915547, |
|
"grad_norm": 5.266959588580237, |
|
"learning_rate": 4.851925184766247e-07, |
|
"logits/chosen": -1.5675857067108154, |
|
"logits/rejected": -1.504521131515503, |
|
"logps/chosen": -352.08807373046875, |
|
"logps/rejected": -370.45294189453125, |
|
"loss": 0.5815, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.783744215965271, |
|
"rewards/margins": 0.4445928931236267, |
|
"rewards/rejected": -1.2283371686935425, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.20153550863723607, |
|
"grad_norm": 7.5633333882780365, |
|
"learning_rate": 4.844743725044897e-07, |
|
"logits/chosen": -1.6219160556793213, |
|
"logits/rejected": -1.5205755233764648, |
|
"logps/chosen": -343.59161376953125, |
|
"logps/rejected": -349.74249267578125, |
|
"loss": 0.5914, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7424141764640808, |
|
"rewards/margins": 0.3212757408618927, |
|
"rewards/rejected": -1.0636898279190063, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.20393474088291746, |
|
"grad_norm": 5.7426817744180045, |
|
"learning_rate": 4.837397790866774e-07, |
|
"logits/chosen": -1.511488914489746, |
|
"logits/rejected": -1.5107842683792114, |
|
"logps/chosen": -362.03057861328125, |
|
"logps/rejected": -407.59161376953125, |
|
"loss": 0.584, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5177355408668518, |
|
"rewards/margins": 0.7411140203475952, |
|
"rewards/rejected": -1.2588495016098022, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.20633397312859886, |
|
"grad_norm": 6.495429597968071, |
|
"learning_rate": 4.829887897519974e-07, |
|
"logits/chosen": -1.4964102506637573, |
|
"logits/rejected": -1.5037615299224854, |
|
"logps/chosen": -321.86236572265625, |
|
"logps/rejected": -395.5457763671875, |
|
"loss": 0.5886, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7702068090438843, |
|
"rewards/margins": 0.5501433610916138, |
|
"rewards/rejected": -1.3203500509262085, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.20873320537428022, |
|
"grad_norm": 5.5808817971322116, |
|
"learning_rate": 4.82221457179368e-07, |
|
"logits/chosen": -1.488948941230774, |
|
"logits/rejected": -1.5117526054382324, |
|
"logps/chosen": -384.2512512207031, |
|
"logps/rejected": -485.3260803222656, |
|
"loss": 0.5547, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.9392452239990234, |
|
"rewards/margins": 1.2166972160339355, |
|
"rewards/rejected": -2.155942440032959, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.21113243761996162, |
|
"grad_norm": 7.0272013657172625, |
|
"learning_rate": 4.814378351941206e-07, |
|
"logits/chosen": -1.520959734916687, |
|
"logits/rejected": -1.5359563827514648, |
|
"logps/chosen": -357.6517333984375, |
|
"logps/rejected": -377.56085205078125, |
|
"loss": 0.5854, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8206228017807007, |
|
"rewards/margins": 0.3496705889701843, |
|
"rewards/rejected": -1.1702934503555298, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.21353166986564298, |
|
"grad_norm": 6.3594913416138885, |
|
"learning_rate": 4.806379787642241e-07, |
|
"logits/chosen": -1.466625452041626, |
|
"logits/rejected": -1.4915995597839355, |
|
"logps/chosen": -374.0470886230469, |
|
"logps/rejected": -476.73828125, |
|
"loss": 0.6061, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0663881301879883, |
|
"rewards/margins": 1.0952132940292358, |
|
"rewards/rejected": -2.1616015434265137, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.21593090211132437, |
|
"grad_norm": 6.116338715106648, |
|
"learning_rate": 4.798219439964293e-07, |
|
"logits/chosen": -1.5042366981506348, |
|
"logits/rejected": -1.5534061193466187, |
|
"logps/chosen": -364.03472900390625, |
|
"logps/rejected": -392.703857421875, |
|
"loss": 0.5509, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9239017367362976, |
|
"rewards/margins": 0.20848917961120605, |
|
"rewards/rejected": -1.1323908567428589, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.21833013435700577, |
|
"grad_norm": 34.61975290546189, |
|
"learning_rate": 4.78989788132333e-07, |
|
"logits/chosen": -1.5566984415054321, |
|
"logits/rejected": -1.5171282291412354, |
|
"logps/chosen": -326.6298828125, |
|
"logps/rejected": -439.526123046875, |
|
"loss": 0.5439, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8856453895568848, |
|
"rewards/margins": 1.1186928749084473, |
|
"rewards/rejected": -2.004338264465332, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.22072936660268713, |
|
"grad_norm": 5.399099459014794, |
|
"learning_rate": 4.781415695443631e-07, |
|
"logits/chosen": -1.4662867784500122, |
|
"logits/rejected": -1.3914134502410889, |
|
"logps/chosen": -505.794921875, |
|
"logps/rejected": -595.918701171875, |
|
"loss": 0.5759, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.901526689529419, |
|
"rewards/margins": 0.8914464116096497, |
|
"rewards/rejected": -2.792973041534424, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.22312859884836853, |
|
"grad_norm": 5.819110199820003, |
|
"learning_rate": 4.772773477316836e-07, |
|
"logits/chosen": -1.547885537147522, |
|
"logits/rejected": -1.5537118911743164, |
|
"logps/chosen": -381.8018493652344, |
|
"logps/rejected": -429.70758056640625, |
|
"loss": 0.5617, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.960909366607666, |
|
"rewards/margins": 0.4732838273048401, |
|
"rewards/rejected": -1.4341931343078613, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2255278310940499, |
|
"grad_norm": 7.075329262381567, |
|
"learning_rate": 4.7639718331602117e-07, |
|
"logits/chosen": -1.4385401010513306, |
|
"logits/rejected": -1.4047901630401611, |
|
"logps/chosen": -439.00054931640625, |
|
"logps/rejected": -563.8079833984375, |
|
"loss": 0.5454, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4129924774169922, |
|
"rewards/margins": 1.386842966079712, |
|
"rewards/rejected": -2.799834966659546, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.22792706333973128, |
|
"grad_norm": 117.88806086127256, |
|
"learning_rate": 4.7550113803741275e-07, |
|
"logits/chosen": -1.4240310192108154, |
|
"logits/rejected": -1.3947417736053467, |
|
"logps/chosen": -479.65008544921875, |
|
"logps/rejected": -550.0196533203125, |
|
"loss": 0.6165, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.7085025310516357, |
|
"rewards/margins": 1.4387762546539307, |
|
"rewards/rejected": -3.1472787857055664, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.23032629558541268, |
|
"grad_norm": 7.753373606542773, |
|
"learning_rate": 4.7458927474987454e-07, |
|
"logits/chosen": -1.4834281206130981, |
|
"logits/rejected": -1.4571282863616943, |
|
"logps/chosen": -435.71551513671875, |
|
"logps/rejected": -423.3853454589844, |
|
"loss": 0.5621, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0432417392730713, |
|
"rewards/margins": 0.4899858832359314, |
|
"rewards/rejected": -1.533227562904358, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.23272552783109404, |
|
"grad_norm": 35.84391506181939, |
|
"learning_rate": 4.7366165741699347e-07, |
|
"logits/chosen": -1.540824055671692, |
|
"logits/rejected": -1.5162460803985596, |
|
"logps/chosen": -473.71514892578125, |
|
"logps/rejected": -511.4437561035156, |
|
"loss": 0.5601, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.2902333736419678, |
|
"rewards/margins": 0.7524299025535583, |
|
"rewards/rejected": -2.042663097381592, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.23512476007677544, |
|
"grad_norm": 6.711770568150169, |
|
"learning_rate": 4.727183511074401e-07, |
|
"logits/chosen": -1.5611612796783447, |
|
"logits/rejected": -1.5459405183792114, |
|
"logps/chosen": -390.70880126953125, |
|
"logps/rejected": -402.97979736328125, |
|
"loss": 0.5557, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9223060607910156, |
|
"rewards/margins": 0.2812274992465973, |
|
"rewards/rejected": -1.20353364944458, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2375239923224568, |
|
"grad_norm": 7.356819549245005, |
|
"learning_rate": 4.717594219904043e-07, |
|
"logits/chosen": -1.5465433597564697, |
|
"logits/rejected": -1.4800186157226562, |
|
"logps/chosen": -394.327880859375, |
|
"logps/rejected": -407.1432189941406, |
|
"loss": 0.5799, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.086284875869751, |
|
"rewards/margins": 0.5457164645195007, |
|
"rewards/rejected": -1.632001519203186, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.2399232245681382, |
|
"grad_norm": 6.180011768389308, |
|
"learning_rate": 4.7078493733095393e-07, |
|
"logits/chosen": -1.4510798454284668, |
|
"logits/rejected": -1.4394136667251587, |
|
"logps/chosen": -414.8174743652344, |
|
"logps/rejected": -487.3868713378906, |
|
"loss": 0.5292, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.4004895687103271, |
|
"rewards/margins": 0.7286332845687866, |
|
"rewards/rejected": -2.1291232109069824, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2423224568138196, |
|
"grad_norm": 8.973107903148879, |
|
"learning_rate": 4.6979496548531614e-07, |
|
"logits/chosen": -1.4409250020980835, |
|
"logits/rejected": -1.4732897281646729, |
|
"logps/chosen": -402.0752258300781, |
|
"logps/rejected": -518.8154907226562, |
|
"loss": 0.5541, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2081267833709717, |
|
"rewards/margins": 0.7229002714157104, |
|
"rewards/rejected": -1.9310270547866821, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.24472168905950095, |
|
"grad_norm": 6.834796795848274, |
|
"learning_rate": 4.6878957589608293e-07, |
|
"logits/chosen": -1.385780930519104, |
|
"logits/rejected": -1.3392270803451538, |
|
"logps/chosen": -520.9588012695312, |
|
"logps/rejected": -768.1580200195312, |
|
"loss": 0.5945, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.352682590484619, |
|
"rewards/margins": 2.0763027667999268, |
|
"rewards/rejected": -4.428984642028809, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.24712092130518235, |
|
"grad_norm": 6.338350339612117, |
|
"learning_rate": 4.6776883908733956e-07, |
|
"logits/chosen": -1.4341570138931274, |
|
"logits/rejected": -1.3487894535064697, |
|
"logps/chosen": -448.89727783203125, |
|
"logps/rejected": -498.39654541015625, |
|
"loss": 0.5344, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.3556662797927856, |
|
"rewards/margins": 1.197887897491455, |
|
"rewards/rejected": -2.553554058074951, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2495201535508637, |
|
"grad_norm": 9.762031523337113, |
|
"learning_rate": 4.667328266597178e-07, |
|
"logits/chosen": -1.5152060985565186, |
|
"logits/rejected": -1.4758002758026123, |
|
"logps/chosen": -388.54815673828125, |
|
"logps/rejected": -449.3814392089844, |
|
"loss": 0.5325, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1459699869155884, |
|
"rewards/margins": 0.7172293663024902, |
|
"rewards/rejected": -1.863199234008789, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2519193857965451, |
|
"grad_norm": 6.587552980986566, |
|
"learning_rate": 4.6568161128537354e-07, |
|
"logits/chosen": -1.5234332084655762, |
|
"logits/rejected": -1.397935152053833, |
|
"logps/chosen": -427.96697998046875, |
|
"logps/rejected": -441.1510314941406, |
|
"loss": 0.548, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.4824414253234863, |
|
"rewards/margins": 0.7462078332901001, |
|
"rewards/rejected": -2.228649139404297, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2543186180422265, |
|
"grad_norm": 7.250799528397824, |
|
"learning_rate": 4.6461526670288877e-07, |
|
"logits/chosen": -1.513885259628296, |
|
"logits/rejected": -1.4797332286834717, |
|
"logps/chosen": -404.1684265136719, |
|
"logps/rejected": -435.52703857421875, |
|
"loss": 0.5982, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1321500539779663, |
|
"rewards/margins": 0.6284357905387878, |
|
"rewards/rejected": -1.7605857849121094, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2567178502879079, |
|
"grad_norm": 6.968262543987022, |
|
"learning_rate": 4.635338677120994e-07, |
|
"logits/chosen": -1.4521477222442627, |
|
"logits/rejected": -1.448061466217041, |
|
"logps/chosen": -391.4552917480469, |
|
"logps/rejected": -503.6328125, |
|
"loss": 0.5347, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.092247486114502, |
|
"rewards/margins": 0.9845672845840454, |
|
"rewards/rejected": -2.076814889907837, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2591170825335892, |
|
"grad_norm": 6.982005120781178, |
|
"learning_rate": 4.6243749016884835e-07, |
|
"logits/chosen": -1.3785518407821655, |
|
"logits/rejected": -1.3828446865081787, |
|
"logps/chosen": -446.35888671875, |
|
"logps/rejected": -595.894775390625, |
|
"loss": 0.5645, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5676217079162598, |
|
"rewards/margins": 1.0973035097122192, |
|
"rewards/rejected": -2.6649250984191895, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2615163147792706, |
|
"grad_norm": 10.40340346607264, |
|
"learning_rate": 4.613262109796645e-07, |
|
"logits/chosen": -1.5188038349151611, |
|
"logits/rejected": -1.5022757053375244, |
|
"logps/chosen": -400.90130615234375, |
|
"logps/rejected": -553.9241943359375, |
|
"loss": 0.5458, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2826495170593262, |
|
"rewards/margins": 1.2281033992767334, |
|
"rewards/rejected": -2.5107529163360596, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.263915547024952, |
|
"grad_norm": 6.972298855628279, |
|
"learning_rate": 4.602001080963678e-07, |
|
"logits/chosen": -1.5422290563583374, |
|
"logits/rejected": -1.467437505722046, |
|
"logps/chosen": -410.7484436035156, |
|
"logps/rejected": -512.1973876953125, |
|
"loss": 0.5535, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1985065937042236, |
|
"rewards/margins": 1.2838784456253052, |
|
"rewards/rejected": -2.4823849201202393, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2663147792706334, |
|
"grad_norm": 8.549949049117258, |
|
"learning_rate": 4.590592605106017e-07, |
|
"logits/chosen": -1.5908862352371216, |
|
"logits/rejected": -1.5656800270080566, |
|
"logps/chosen": -441.91448974609375, |
|
"logps/rejected": -512.890869140625, |
|
"loss": 0.5669, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.3182880878448486, |
|
"rewards/margins": 1.022831678390503, |
|
"rewards/rejected": -2.3411195278167725, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.2687140115163148, |
|
"grad_norm": 7.404227636225907, |
|
"learning_rate": 4.5790374824829165e-07, |
|
"logits/chosen": -1.3316807746887207, |
|
"logits/rejected": -1.3402780294418335, |
|
"logps/chosen": -334.657958984375, |
|
"logps/rejected": -431.4007873535156, |
|
"loss": 0.561, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3125879764556885, |
|
"rewards/margins": 0.8587444424629211, |
|
"rewards/rejected": -2.1713321208953857, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.27111324376199614, |
|
"grad_norm": 9.086124099951459, |
|
"learning_rate": 4.5673365236403216e-07, |
|
"logits/chosen": -1.373974323272705, |
|
"logits/rejected": -1.3391624689102173, |
|
"logps/chosen": -451.37176513671875, |
|
"logps/rejected": -618.1118774414062, |
|
"loss": 0.5369, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.3380093574523926, |
|
"rewards/margins": 1.4104362726211548, |
|
"rewards/rejected": -3.748445987701416, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.27351247600767753, |
|
"grad_norm": 8.245489550138963, |
|
"learning_rate": 4.5554905493540075e-07, |
|
"logits/chosen": -1.3963029384613037, |
|
"logits/rejected": -1.3131446838378906, |
|
"logps/chosen": -397.52484130859375, |
|
"logps/rejected": -545.176025390625, |
|
"loss": 0.5366, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6280912160873413, |
|
"rewards/margins": 1.4453803300857544, |
|
"rewards/rejected": -3.0734715461730957, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2759117082533589, |
|
"grad_norm": 7.262761099946982, |
|
"learning_rate": 4.5435003905720074e-07, |
|
"logits/chosen": -1.4503087997436523, |
|
"logits/rejected": -1.3999178409576416, |
|
"logps/chosen": -486.7837829589844, |
|
"logps/rejected": -546.538818359375, |
|
"loss": 0.5492, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.865114450454712, |
|
"rewards/margins": 0.9383285641670227, |
|
"rewards/rejected": -2.803443193435669, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2783109404990403, |
|
"grad_norm": 10.467347582561494, |
|
"learning_rate": 4.531366888356324e-07, |
|
"logits/chosen": -1.413732886314392, |
|
"logits/rejected": -1.3654654026031494, |
|
"logps/chosen": -368.93231201171875, |
|
"logps/rejected": -577.9190673828125, |
|
"loss": 0.527, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6492884159088135, |
|
"rewards/margins": 1.7214257717132568, |
|
"rewards/rejected": -3.370713710784912, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2807101727447217, |
|
"grad_norm": 7.656835918634615, |
|
"learning_rate": 4.519090893823931e-07, |
|
"logits/chosen": -1.4396826028823853, |
|
"logits/rejected": -1.4221911430358887, |
|
"logps/chosen": -438.3218688964844, |
|
"logps/rejected": -529.28759765625, |
|
"loss": 0.5347, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.745650291442871, |
|
"rewards/margins": 1.0542502403259277, |
|
"rewards/rejected": -2.799900531768799, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.28310940499040305, |
|
"grad_norm": 10.339237287269262, |
|
"learning_rate": 4.5066732680870734e-07, |
|
"logits/chosen": -1.3462697267532349, |
|
"logits/rejected": -1.3036195039749146, |
|
"logps/chosen": -428.9185485839844, |
|
"logps/rejected": -522.67236328125, |
|
"loss": 0.5053, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5868905782699585, |
|
"rewards/margins": 1.3584994077682495, |
|
"rewards/rejected": -2.945390224456787, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.28550863723608444, |
|
"grad_norm": 10.424964890520823, |
|
"learning_rate": 4.494114882192862e-07, |
|
"logits/chosen": -1.4587008953094482, |
|
"logits/rejected": -1.3789886236190796, |
|
"logps/chosen": -418.345458984375, |
|
"logps/rejected": -561.0228271484375, |
|
"loss": 0.5412, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4498519897460938, |
|
"rewards/margins": 1.7318111658096313, |
|
"rewards/rejected": -3.1816630363464355, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.28790786948176583, |
|
"grad_norm": 9.033374903725427, |
|
"learning_rate": 4.4814166170621735e-07, |
|
"logits/chosen": -1.4350335597991943, |
|
"logits/rejected": -1.4181472063064575, |
|
"logps/chosen": -448.14141845703125, |
|
"logps/rejected": -524.2359619140625, |
|
"loss": 0.5488, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7904564142227173, |
|
"rewards/margins": 1.0193628072738647, |
|
"rewards/rejected": -2.809818983078003, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2903071017274472, |
|
"grad_norm": 12.051469646958232, |
|
"learning_rate": 4.468579363427858e-07, |
|
"logits/chosen": -1.5547723770141602, |
|
"logits/rejected": -1.5221188068389893, |
|
"logps/chosen": -415.83349609375, |
|
"logps/rejected": -476.7373962402344, |
|
"loss": 0.5325, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3881736993789673, |
|
"rewards/margins": 0.9286211133003235, |
|
"rewards/rejected": -2.3167946338653564, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2927063339731286, |
|
"grad_norm": 8.844148732841079, |
|
"learning_rate": 4.4556040217722555e-07, |
|
"logits/chosen": -1.574505090713501, |
|
"logits/rejected": -1.5706756114959717, |
|
"logps/chosen": -370.81610107421875, |
|
"logps/rejected": -481.24993896484375, |
|
"loss": 0.5232, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1100866794586182, |
|
"rewards/margins": 0.9274276494979858, |
|
"rewards/rejected": -2.0375144481658936, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.29510556621880996, |
|
"grad_norm": 9.594644147905797, |
|
"learning_rate": 4.442491502264033e-07, |
|
"logits/chosen": -1.4629461765289307, |
|
"logits/rejected": -1.4520565271377563, |
|
"logps/chosen": -367.5827331542969, |
|
"logps/rejected": -431.8439025878906, |
|
"loss": 0.5347, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.2434320449829102, |
|
"rewards/margins": 0.7836133241653442, |
|
"rewards/rejected": -2.0270450115203857, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.29750479846449135, |
|
"grad_norm": 6.780108062586533, |
|
"learning_rate": 4.429242724694338e-07, |
|
"logits/chosen": -1.5323419570922852, |
|
"logits/rejected": -1.5191363096237183, |
|
"logps/chosen": -375.8507995605469, |
|
"logps/rejected": -497.87921142578125, |
|
"loss": 0.5549, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.157468557357788, |
|
"rewards/margins": 1.0888302326202393, |
|
"rewards/rejected": -2.2462985515594482, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.29990403071017274, |
|
"grad_norm": 7.668018750014516, |
|
"learning_rate": 4.4158586184122817e-07, |
|
"logits/chosen": -1.468441128730774, |
|
"logits/rejected": -1.3979756832122803, |
|
"logps/chosen": -467.69976806640625, |
|
"logps/rejected": -525.319091796875, |
|
"loss": 0.5323, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5909717082977295, |
|
"rewards/margins": 0.947952926158905, |
|
"rewards/rejected": -2.5389246940612793, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.30230326295585414, |
|
"grad_norm": 8.293797053598022, |
|
"learning_rate": 4.4023401222597443e-07, |
|
"logits/chosen": -1.4755961894989014, |
|
"logits/rejected": -1.4515053033828735, |
|
"logps/chosen": -433.07147216796875, |
|
"logps/rejected": -502.43963623046875, |
|
"loss": 0.5421, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.423656702041626, |
|
"rewards/margins": 0.8972675204277039, |
|
"rewards/rejected": -2.3209242820739746, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.30470249520153553, |
|
"grad_norm": 11.368693082147297, |
|
"learning_rate": 4.3886881845055235e-07, |
|
"logits/chosen": -1.4548556804656982, |
|
"logits/rejected": -1.4107099771499634, |
|
"logps/chosen": -393.0867614746094, |
|
"logps/rejected": -568.4356689453125, |
|
"loss": 0.5143, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3003679513931274, |
|
"rewards/margins": 1.8714444637298584, |
|
"rewards/rejected": -3.1718125343322754, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.30710172744721687, |
|
"grad_norm": 8.372672642590855, |
|
"learning_rate": 4.374903762778814e-07, |
|
"logits/chosen": -1.449552297592163, |
|
"logits/rejected": -1.375659704208374, |
|
"logps/chosen": -478.12615966796875, |
|
"logps/rejected": -589.36376953125, |
|
"loss": 0.5178, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0235326290130615, |
|
"rewards/margins": 1.4111472368240356, |
|
"rewards/rejected": -3.4346795082092285, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.30950095969289826, |
|
"grad_norm": 10.643231523467081, |
|
"learning_rate": 4.3609878240020356e-07, |
|
"logits/chosen": -1.5441666841506958, |
|
"logits/rejected": -1.4388468265533447, |
|
"logps/chosen": -532.8125, |
|
"logps/rejected": -604.3128662109375, |
|
"loss": 0.5416, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.138451337814331, |
|
"rewards/margins": 1.39248788356781, |
|
"rewards/rejected": -3.5309395790100098, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.31190019193857965, |
|
"grad_norm": 8.214989448750508, |
|
"learning_rate": 4.346941344323005e-07, |
|
"logits/chosen": -1.5427402257919312, |
|
"logits/rejected": -1.4345190525054932, |
|
"logps/chosen": -433.6434631347656, |
|
"logps/rejected": -459.6392517089844, |
|
"loss": 0.5528, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6685816049575806, |
|
"rewards/margins": 0.8387784957885742, |
|
"rewards/rejected": -2.5073604583740234, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.31429942418426104, |
|
"grad_norm": 7.657845271019461, |
|
"learning_rate": 4.332765309046467e-07, |
|
"logits/chosen": -1.3856598138809204, |
|
"logits/rejected": -1.3218709230422974, |
|
"logps/chosen": -462.48175048828125, |
|
"logps/rejected": -564.1286010742188, |
|
"loss": 0.5528, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8233705759048462, |
|
"rewards/margins": 1.4314063787460327, |
|
"rewards/rejected": -3.2547767162323, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.31669865642994244, |
|
"grad_norm": 12.669727176995098, |
|
"learning_rate": 4.3184607125649754e-07, |
|
"logits/chosen": -1.5354691743850708, |
|
"logits/rejected": -1.452030062675476, |
|
"logps/chosen": -461.422607421875, |
|
"logps/rejected": -695.4813842773438, |
|
"loss": 0.5574, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5948652029037476, |
|
"rewards/margins": 2.2517476081848145, |
|
"rewards/rejected": -3.8466124534606934, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3190978886756238, |
|
"grad_norm": 7.428351039897863, |
|
"learning_rate": 4.304028558289141e-07, |
|
"logits/chosen": -1.5226895809173584, |
|
"logits/rejected": -1.4397214651107788, |
|
"logps/chosen": -448.7567443847656, |
|
"logps/rejected": -654.158447265625, |
|
"loss": 0.5026, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4266167879104614, |
|
"rewards/margins": 2.2771382331848145, |
|
"rewards/rejected": -3.7037551403045654, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.32149712092130517, |
|
"grad_norm": 8.35394394188628, |
|
"learning_rate": 4.28946985857725e-07, |
|
"logits/chosen": -1.5027117729187012, |
|
"logits/rejected": -1.4603252410888672, |
|
"logps/chosen": -464.45770263671875, |
|
"logps/rejected": -600.5919189453125, |
|
"loss": 0.517, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.7185165882110596, |
|
"rewards/margins": 1.4774185419082642, |
|
"rewards/rejected": -3.195935010910034, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.32389635316698656, |
|
"grad_norm": 8.547450000172507, |
|
"learning_rate": 4.2747856346642445e-07, |
|
"logits/chosen": -1.6239745616912842, |
|
"logits/rejected": -1.5768134593963623, |
|
"logps/chosen": -385.5645751953125, |
|
"logps/rejected": -461.92413330078125, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4032585620880127, |
|
"rewards/margins": 0.9687395095825195, |
|
"rewards/rejected": -2.3719983100891113, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.32629558541266795, |
|
"grad_norm": 10.46357929901763, |
|
"learning_rate": 4.2599769165900933e-07, |
|
"logits/chosen": -1.5189791917800903, |
|
"logits/rejected": -1.4443309307098389, |
|
"logps/chosen": -452.8273010253906, |
|
"logps/rejected": -588.3678588867188, |
|
"loss": 0.5361, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9200900793075562, |
|
"rewards/margins": 1.5941896438598633, |
|
"rewards/rejected": -3.51427960395813, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.32869481765834935, |
|
"grad_norm": 7.921189286394596, |
|
"learning_rate": 4.245044743127535e-07, |
|
"logits/chosen": -1.5018115043640137, |
|
"logits/rejected": -1.5035383701324463, |
|
"logps/chosen": -435.9285583496094, |
|
"logps/rejected": -535.82275390625, |
|
"loss": 0.5308, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5959597826004028, |
|
"rewards/margins": 1.0554875135421753, |
|
"rewards/rejected": -2.651447057723999, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3310940499040307, |
|
"grad_norm": 9.780614080293216, |
|
"learning_rate": 4.229990161709214e-07, |
|
"logits/chosen": -1.4760569334030151, |
|
"logits/rejected": -1.4197697639465332, |
|
"logps/chosen": -389.590087890625, |
|
"logps/rejected": -585.2882690429688, |
|
"loss": 0.5347, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.401724100112915, |
|
"rewards/margins": 1.7929754257202148, |
|
"rewards/rejected": -3.1946990489959717, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3334932821497121, |
|
"grad_norm": 8.139127977792159, |
|
"learning_rate": 4.214814228354204e-07, |
|
"logits/chosen": -1.5709788799285889, |
|
"logits/rejected": -1.4670157432556152, |
|
"logps/chosen": -426.033203125, |
|
"logps/rejected": -606.0979614257812, |
|
"loss": 0.5143, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.373948335647583, |
|
"rewards/margins": 1.9848015308380127, |
|
"rewards/rejected": -3.358750104904175, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.33589251439539347, |
|
"grad_norm": 8.460928737411663, |
|
"learning_rate": 4.1995180075939375e-07, |
|
"logits/chosen": -1.550615668296814, |
|
"logits/rejected": -1.5216505527496338, |
|
"logps/chosen": -427.553955078125, |
|
"logps/rejected": -545.8343505859375, |
|
"loss": 0.4946, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3485326766967773, |
|
"rewards/margins": 1.4248372316360474, |
|
"rewards/rejected": -2.773369789123535, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.33829174664107486, |
|
"grad_norm": 8.81270929184022, |
|
"learning_rate": 4.1841025723975297e-07, |
|
"logits/chosen": -1.5789110660552979, |
|
"logits/rejected": -1.5154634714126587, |
|
"logps/chosen": -434.64849853515625, |
|
"logps/rejected": -600.957763671875, |
|
"loss": 0.5008, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.3257498741149902, |
|
"rewards/margins": 1.8993675708770752, |
|
"rewards/rejected": -3.2251172065734863, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.34069097888675626, |
|
"grad_norm": 10.770138588745654, |
|
"learning_rate": 4.168569004096516e-07, |
|
"logits/chosen": -1.4905095100402832, |
|
"logits/rejected": -1.4610494375228882, |
|
"logps/chosen": -411.388427734375, |
|
"logps/rejected": -600.2252807617188, |
|
"loss": 0.5136, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6003555059432983, |
|
"rewards/margins": 1.7819187641143799, |
|
"rewards/rejected": -3.3822741508483887, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.3430902111324376, |
|
"grad_norm": 7.293720682350634, |
|
"learning_rate": 4.152918392308997e-07, |
|
"logits/chosen": -1.4742928743362427, |
|
"logits/rejected": -1.4396989345550537, |
|
"logps/chosen": -440.1182556152344, |
|
"logps/rejected": -565.8653564453125, |
|
"loss": 0.5199, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.718984603881836, |
|
"rewards/margins": 1.4051283597946167, |
|
"rewards/rejected": -3.124112606048584, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.345489443378119, |
|
"grad_norm": 12.227998192379331, |
|
"learning_rate": 4.137151834863213e-07, |
|
"logits/chosen": -1.5375521183013916, |
|
"logits/rejected": -1.4570263624191284, |
|
"logps/chosen": -437.3187561035156, |
|
"logps/rejected": -679.8615112304688, |
|
"loss": 0.5442, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.817325234413147, |
|
"rewards/margins": 2.153637409210205, |
|
"rewards/rejected": -3.9709632396698, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3478886756238004, |
|
"grad_norm": 8.40519372330152, |
|
"learning_rate": 4.121270437720526e-07, |
|
"logits/chosen": -1.6291801929473877, |
|
"logits/rejected": -1.6403745412826538, |
|
"logps/chosen": -378.547119140625, |
|
"logps/rejected": -465.722412109375, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5566104650497437, |
|
"rewards/margins": 0.5313079953193665, |
|
"rewards/rejected": -2.087918519973755, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3502879078694818, |
|
"grad_norm": 7.576024138002559, |
|
"learning_rate": 4.105275314897852e-07, |
|
"logits/chosen": -1.4507944583892822, |
|
"logits/rejected": -1.3628849983215332, |
|
"logps/chosen": -424.55401611328125, |
|
"logps/rejected": -754.7704467773438, |
|
"loss": 0.5183, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7864032983779907, |
|
"rewards/margins": 3.053657293319702, |
|
"rewards/rejected": -4.840060710906982, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.35268714011516317, |
|
"grad_norm": 9.120638907797822, |
|
"learning_rate": 4.089167588389508e-07, |
|
"logits/chosen": -1.6330862045288086, |
|
"logits/rejected": -1.5513877868652344, |
|
"logps/chosen": -524.4133911132812, |
|
"logps/rejected": -615.8099975585938, |
|
"loss": 0.5284, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.665891408920288, |
|
"rewards/margins": 1.4321410655975342, |
|
"rewards/rejected": -3.0980327129364014, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.3550863723608445, |
|
"grad_norm": 8.831999384046886, |
|
"learning_rate": 4.072948388088515e-07, |
|
"logits/chosen": -1.5293561220169067, |
|
"logits/rejected": -1.4742053747177124, |
|
"logps/chosen": -455.7305603027344, |
|
"logps/rejected": -572.6296997070312, |
|
"loss": 0.5715, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.669014573097229, |
|
"rewards/margins": 1.1838295459747314, |
|
"rewards/rejected": -2.852843999862671, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.3574856046065259, |
|
"grad_norm": 8.125729954588683, |
|
"learning_rate": 4.056618851707334e-07, |
|
"logits/chosen": -1.5556094646453857, |
|
"logits/rejected": -1.544398307800293, |
|
"logps/chosen": -411.40631103515625, |
|
"logps/rejected": -525.8662719726562, |
|
"loss": 0.4887, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.1296329498291016, |
|
"rewards/margins": 1.2118812799453735, |
|
"rewards/rejected": -2.3415141105651855, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.3598848368522073, |
|
"grad_norm": 7.32928329168995, |
|
"learning_rate": 4.0401801246980675e-07, |
|
"logits/chosen": -1.5943670272827148, |
|
"logits/rejected": -1.550929307937622, |
|
"logps/chosen": -381.600830078125, |
|
"logps/rejected": -475.3746643066406, |
|
"loss": 0.5565, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4997680187225342, |
|
"rewards/margins": 1.1621274948120117, |
|
"rewards/rejected": -2.661895513534546, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3622840690978887, |
|
"grad_norm": 8.147323008767865, |
|
"learning_rate": 4.0236333601721043e-07, |
|
"logits/chosen": -1.5589003562927246, |
|
"logits/rejected": -1.6057322025299072, |
|
"logps/chosen": -448.15496826171875, |
|
"logps/rejected": -532.5565185546875, |
|
"loss": 0.5517, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.489591121673584, |
|
"rewards/margins": 0.731250524520874, |
|
"rewards/rejected": -2.220841646194458, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.3646833013435701, |
|
"grad_norm": 7.889436233563884, |
|
"learning_rate": 4.0069797188192364e-07, |
|
"logits/chosen": -1.482542872428894, |
|
"logits/rejected": -1.3949836492538452, |
|
"logps/chosen": -495.69140625, |
|
"logps/rejected": -660.7445678710938, |
|
"loss": 0.5172, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8629268407821655, |
|
"rewards/margins": 2.0722007751464844, |
|
"rewards/rejected": -3.9351277351379395, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.3670825335892514, |
|
"grad_norm": 10.641539880656538, |
|
"learning_rate": 3.9902203688262417e-07, |
|
"logits/chosen": -1.554213285446167, |
|
"logits/rejected": -1.5207319259643555, |
|
"logps/chosen": -438.8204650878906, |
|
"logps/rejected": -519.6439208984375, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5820989608764648, |
|
"rewards/margins": 0.9530105590820312, |
|
"rewards/rejected": -2.535109281539917, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.3694817658349328, |
|
"grad_norm": 8.725526436669323, |
|
"learning_rate": 3.9733564857949365e-07, |
|
"logits/chosen": -1.5051288604736328, |
|
"logits/rejected": -1.4673130512237549, |
|
"logps/chosen": -474.5401306152344, |
|
"logps/rejected": -572.7054443359375, |
|
"loss": 0.4857, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5164769887924194, |
|
"rewards/margins": 1.4391937255859375, |
|
"rewards/rejected": -2.9556708335876465, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.3718809980806142, |
|
"grad_norm": 19.17908292592815, |
|
"learning_rate": 3.9563892526597177e-07, |
|
"logits/chosen": -1.5526840686798096, |
|
"logits/rejected": -1.5369585752487183, |
|
"logps/chosen": -372.50921630859375, |
|
"logps/rejected": -465.1087951660156, |
|
"loss": 0.5281, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3534399271011353, |
|
"rewards/margins": 0.4993162155151367, |
|
"rewards/rejected": -1.852756142616272, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.3742802303262956, |
|
"grad_norm": 7.678163461265528, |
|
"learning_rate": 3.9393198596045795e-07, |
|
"logits/chosen": -1.5254640579223633, |
|
"logits/rejected": -1.5481865406036377, |
|
"logps/chosen": -403.49542236328125, |
|
"logps/rejected": -521.7468872070312, |
|
"loss": 0.5212, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5229865312576294, |
|
"rewards/margins": 1.0962460041046143, |
|
"rewards/rejected": -2.619232416152954, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.376679462571977, |
|
"grad_norm": 6.910401313727213, |
|
"learning_rate": 3.922149503979628e-07, |
|
"logits/chosen": -1.4570752382278442, |
|
"logits/rejected": -1.3976576328277588, |
|
"logps/chosen": -587.8316040039062, |
|
"logps/rejected": -922.5247802734375, |
|
"loss": 0.4951, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.9414281845092773, |
|
"rewards/margins": 3.3815834522247314, |
|
"rewards/rejected": -6.323011875152588, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.3790786948176583, |
|
"grad_norm": 10.822868640544474, |
|
"learning_rate": 3.904879390217095e-07, |
|
"logits/chosen": -1.605613112449646, |
|
"logits/rejected": -1.5582932233810425, |
|
"logps/chosen": -432.2767028808594, |
|
"logps/rejected": -528.9737548828125, |
|
"loss": 0.5277, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6335880756378174, |
|
"rewards/margins": 1.2116552591323853, |
|
"rewards/rejected": -2.845243215560913, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3814779270633397, |
|
"grad_norm": 9.531595564715918, |
|
"learning_rate": 3.8875107297468463e-07, |
|
"logits/chosen": -1.5569547414779663, |
|
"logits/rejected": -1.5293956995010376, |
|
"logps/chosen": -408.19317626953125, |
|
"logps/rejected": -679.7545166015625, |
|
"loss": 0.5166, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.506165862083435, |
|
"rewards/margins": 2.2621097564697266, |
|
"rewards/rejected": -3.768275499343872, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.3838771593090211, |
|
"grad_norm": 9.016181079029822, |
|
"learning_rate": 3.87004474091141e-07, |
|
"logits/chosen": -1.5378257036209106, |
|
"logits/rejected": -1.5579806566238403, |
|
"logps/chosen": -399.8911437988281, |
|
"logps/rejected": -520.0226440429688, |
|
"loss": 0.5094, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.619594931602478, |
|
"rewards/margins": 1.0523192882537842, |
|
"rewards/rejected": -2.6719141006469727, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3862763915547025, |
|
"grad_norm": 9.569743956165809, |
|
"learning_rate": 3.8524826488805114e-07, |
|
"logits/chosen": -1.5677305459976196, |
|
"logits/rejected": -1.5042650699615479, |
|
"logps/chosen": -488.653564453125, |
|
"logps/rejected": -553.4172973632812, |
|
"loss": 0.57, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.8398549556732178, |
|
"rewards/margins": 1.1457983255386353, |
|
"rewards/rejected": -2.9856534004211426, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.3886756238003839, |
|
"grad_norm": 7.953020161460846, |
|
"learning_rate": 3.834825685565133e-07, |
|
"logits/chosen": -1.5813748836517334, |
|
"logits/rejected": -1.5757431983947754, |
|
"logps/chosen": -361.91192626953125, |
|
"logps/rejected": -377.3770446777344, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.096151351928711, |
|
"rewards/margins": 0.5215452313423157, |
|
"rewards/rejected": -1.6176965236663818, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.39107485604606523, |
|
"grad_norm": 12.112559070938909, |
|
"learning_rate": 3.8170750895311007e-07, |
|
"logits/chosen": -1.661161184310913, |
|
"logits/rejected": -1.604376196861267, |
|
"logps/chosen": -426.1370544433594, |
|
"logps/rejected": -517.6278076171875, |
|
"loss": 0.4749, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2490683794021606, |
|
"rewards/margins": 1.1713922023773193, |
|
"rewards/rejected": -2.4204607009887695, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.3934740882917466, |
|
"grad_norm": 10.613017352814472, |
|
"learning_rate": 3.7992321059122045e-07, |
|
"logits/chosen": -1.477107286453247, |
|
"logits/rejected": -1.435372233390808, |
|
"logps/chosen": -473.62750244140625, |
|
"logps/rejected": -581.3477172851562, |
|
"loss": 0.5086, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.0801384449005127, |
|
"rewards/margins": 1.3069989681243896, |
|
"rewards/rejected": -3.3871371746063232, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.395873320537428, |
|
"grad_norm": 8.03177266316891, |
|
"learning_rate": 3.7812979863228576e-07, |
|
"logits/chosen": -1.4891306161880493, |
|
"logits/rejected": -1.524371862411499, |
|
"logps/chosen": -451.3934631347656, |
|
"logps/rejected": -560.3739013671875, |
|
"loss": 0.4753, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.225372314453125, |
|
"rewards/margins": 0.9865644574165344, |
|
"rewards/rejected": -3.2119364738464355, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3982725527831094, |
|
"grad_norm": 14.176933508927005, |
|
"learning_rate": 3.763273988770296e-07, |
|
"logits/chosen": -1.4202697277069092, |
|
"logits/rejected": -1.3632880449295044, |
|
"logps/chosen": -510.245361328125, |
|
"logps/rejected": -665.1488037109375, |
|
"loss": 0.4931, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.517984628677368, |
|
"rewards/margins": 1.563971757888794, |
|
"rewards/rejected": -4.081956386566162, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4006717850287908, |
|
"grad_norm": 10.323789347042394, |
|
"learning_rate": 3.7451613775663405e-07, |
|
"logits/chosen": -1.5507957935333252, |
|
"logits/rejected": -1.4645434617996216, |
|
"logps/chosen": -424.00042724609375, |
|
"logps/rejected": -677.9902954101562, |
|
"loss": 0.5348, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6596157550811768, |
|
"rewards/margins": 2.5375232696533203, |
|
"rewards/rejected": -4.197138786315918, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.40307101727447214, |
|
"grad_norm": 13.602368967903537, |
|
"learning_rate": 3.726961423238706e-07, |
|
"logits/chosen": -1.5984694957733154, |
|
"logits/rejected": -1.5915277004241943, |
|
"logps/chosen": -382.999755859375, |
|
"logps/rejected": -552.7100830078125, |
|
"loss": 0.5241, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.413910150527954, |
|
"rewards/margins": 1.5153096914291382, |
|
"rewards/rejected": -2.9292197227478027, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.40547024952015354, |
|
"grad_norm": 7.502258208680551, |
|
"learning_rate": 3.708675402441882e-07, |
|
"logits/chosen": -1.6568260192871094, |
|
"logits/rejected": -1.5876600742340088, |
|
"logps/chosen": -440.16357421875, |
|
"logps/rejected": -474.075439453125, |
|
"loss": 0.5576, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4147294759750366, |
|
"rewards/margins": 0.780114471912384, |
|
"rewards/rejected": -2.1948440074920654, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.40786948176583493, |
|
"grad_norm": 8.66494523712904, |
|
"learning_rate": 3.6903045978675775e-07, |
|
"logits/chosen": -1.562082052230835, |
|
"logits/rejected": -1.4948246479034424, |
|
"logps/chosen": -393.52508544921875, |
|
"logps/rejected": -548.5087280273438, |
|
"loss": 0.5081, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3889000415802002, |
|
"rewards/margins": 1.800432562828064, |
|
"rewards/rejected": -3.1893324851989746, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4102687140115163, |
|
"grad_norm": 8.12061625541492, |
|
"learning_rate": 3.6718502981547474e-07, |
|
"logits/chosen": -1.6216672658920288, |
|
"logits/rejected": -1.6438817977905273, |
|
"logps/chosen": -408.8639221191406, |
|
"logps/rejected": -543.5513916015625, |
|
"loss": 0.5151, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3934214115142822, |
|
"rewards/margins": 0.9635807275772095, |
|
"rewards/rejected": -2.357002019882202, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.4126679462571977, |
|
"grad_norm": 7.83467861319349, |
|
"learning_rate": 3.6533137977991986e-07, |
|
"logits/chosen": -1.6941850185394287, |
|
"logits/rejected": -1.6553080081939697, |
|
"logps/chosen": -461.6195373535156, |
|
"logps/rejected": -582.6620483398438, |
|
"loss": 0.5341, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6303510665893555, |
|
"rewards/margins": 0.9588174819946289, |
|
"rewards/rejected": -2.5891685485839844, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.41506717850287905, |
|
"grad_norm": 7.244089308316078, |
|
"learning_rate": 3.6346963970627865e-07, |
|
"logits/chosen": -1.5541958808898926, |
|
"logits/rejected": -1.5294215679168701, |
|
"logps/chosen": -429.1705017089844, |
|
"logps/rejected": -555.658447265625, |
|
"loss": 0.5078, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.70223867893219, |
|
"rewards/margins": 1.1579748392105103, |
|
"rewards/rejected": -2.8602135181427, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.41746641074856045, |
|
"grad_norm": 22.58474812752272, |
|
"learning_rate": 3.615999401882207e-07, |
|
"logits/chosen": -1.4961183071136475, |
|
"logits/rejected": -1.4009946584701538, |
|
"logps/chosen": -451.9063415527344, |
|
"logps/rejected": -685.244140625, |
|
"loss": 0.5014, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.072859764099121, |
|
"rewards/margins": 2.2618536949157715, |
|
"rewards/rejected": -4.334712982177734, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.41986564299424184, |
|
"grad_norm": 8.32162712301176, |
|
"learning_rate": 3.597224123777389e-07, |
|
"logits/chosen": -1.5286868810653687, |
|
"logits/rejected": -1.387584924697876, |
|
"logps/chosen": -498.414306640625, |
|
"logps/rejected": -741.4631958007812, |
|
"loss": 0.5284, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.277275800704956, |
|
"rewards/margins": 2.3793492317199707, |
|
"rewards/rejected": -4.656625747680664, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.42226487523992323, |
|
"grad_norm": 8.974293009829895, |
|
"learning_rate": 3.5783718797595e-07, |
|
"logits/chosen": -1.6080338954925537, |
|
"logits/rejected": -1.4449987411499023, |
|
"logps/chosen": -483.98486328125, |
|
"logps/rejected": -583.08056640625, |
|
"loss": 0.4899, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6723836660385132, |
|
"rewards/margins": 1.4705579280853271, |
|
"rewards/rejected": -3.1429412364959717, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4246641074856046, |
|
"grad_norm": 9.046062404092567, |
|
"learning_rate": 3.559443992238558e-07, |
|
"logits/chosen": -1.6072918176651, |
|
"logits/rejected": -1.5334550142288208, |
|
"logps/chosen": -404.91387939453125, |
|
"logps/rejected": -659.3243408203125, |
|
"loss": 0.5256, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4262123107910156, |
|
"rewards/margins": 2.3509132862091064, |
|
"rewards/rejected": -3.777125835418701, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.42706333973128596, |
|
"grad_norm": 7.551979886612425, |
|
"learning_rate": 3.540441788930673e-07, |
|
"logits/chosen": -1.5570323467254639, |
|
"logits/rejected": -1.5010316371917725, |
|
"logps/chosen": -499.6627502441406, |
|
"logps/rejected": -655.69775390625, |
|
"loss": 0.4806, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.816043496131897, |
|
"rewards/margins": 2.008720636367798, |
|
"rewards/rejected": -3.8247642517089844, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.42946257197696736, |
|
"grad_norm": 8.307725059400688, |
|
"learning_rate": 3.5213666027649123e-07, |
|
"logits/chosen": -1.5870680809020996, |
|
"logits/rejected": -1.5261785984039307, |
|
"logps/chosen": -484.54364013671875, |
|
"logps/rejected": -549.2987060546875, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.8930203914642334, |
|
"rewards/margins": 1.094689965248108, |
|
"rewards/rejected": -2.987710475921631, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.43186180422264875, |
|
"grad_norm": 8.582762739992743, |
|
"learning_rate": 3.5022197717898017e-07, |
|
"logits/chosen": -1.580644130706787, |
|
"logits/rejected": -1.3994085788726807, |
|
"logps/chosen": -401.8323059082031, |
|
"logps/rejected": -603.8817138671875, |
|
"loss": 0.4704, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.661513328552246, |
|
"rewards/margins": 2.428684949874878, |
|
"rewards/rejected": -4.090198516845703, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.43426103646833014, |
|
"grad_norm": 10.93719877269778, |
|
"learning_rate": 3.4830026390794633e-07, |
|
"logits/chosen": -1.607084035873413, |
|
"logits/rejected": -1.5272046327590942, |
|
"logps/chosen": -510.1648864746094, |
|
"logps/rejected": -667.7628173828125, |
|
"loss": 0.4909, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0123143196105957, |
|
"rewards/margins": 2.094109535217285, |
|
"rewards/rejected": -4.106423377990723, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.43666026871401153, |
|
"grad_norm": 8.551853777479481, |
|
"learning_rate": 3.4637165526394104e-07, |
|
"logits/chosen": -1.6525242328643799, |
|
"logits/rejected": -1.6364984512329102, |
|
"logps/chosen": -405.4945983886719, |
|
"logps/rejected": -535.1358032226562, |
|
"loss": 0.4973, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5821750164031982, |
|
"rewards/margins": 1.2321652173995972, |
|
"rewards/rejected": -2.814340114593506, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.43905950095969287, |
|
"grad_norm": 7.385716657081044, |
|
"learning_rate": 3.4443628653119814e-07, |
|
"logits/chosen": -1.517671823501587, |
|
"logits/rejected": -1.4017354249954224, |
|
"logps/chosen": -517.3212890625, |
|
"logps/rejected": -859.2703247070312, |
|
"loss": 0.5639, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.307370662689209, |
|
"rewards/margins": 3.0285544395446777, |
|
"rewards/rejected": -5.335925102233887, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.44145873320537427, |
|
"grad_norm": 9.147598141998557, |
|
"learning_rate": 3.424942934681453e-07, |
|
"logits/chosen": -1.603632926940918, |
|
"logits/rejected": -1.50962233543396, |
|
"logps/chosen": -405.5295715332031, |
|
"logps/rejected": -603.0396728515625, |
|
"loss": 0.4915, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5191326141357422, |
|
"rewards/margins": 2.0283052921295166, |
|
"rewards/rejected": -3.5474376678466797, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.44385796545105566, |
|
"grad_norm": 10.988746391986139, |
|
"learning_rate": 3.405458122978804e-07, |
|
"logits/chosen": -1.5768574476242065, |
|
"logits/rejected": -1.5998786687850952, |
|
"logps/chosen": -428.5262756347656, |
|
"logps/rejected": -487.67987060546875, |
|
"loss": 0.4836, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3609421253204346, |
|
"rewards/margins": 0.9485089182853699, |
|
"rewards/rejected": -2.309451103210449, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.44625719769673705, |
|
"grad_norm": 15.944164556320738, |
|
"learning_rate": 3.3859097969861633e-07, |
|
"logits/chosen": -1.625605583190918, |
|
"logits/rejected": -1.5874695777893066, |
|
"logps/chosen": -494.60919189453125, |
|
"logps/rejected": -607.7193603515625, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.8808681964874268, |
|
"rewards/margins": 1.5208300352096558, |
|
"rewards/rejected": -3.401698350906372, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.44865642994241844, |
|
"grad_norm": 8.268100089151192, |
|
"learning_rate": 3.366299327940936e-07, |
|
"logits/chosen": -1.6645658016204834, |
|
"logits/rejected": -1.648751974105835, |
|
"logps/chosen": -499.26336669921875, |
|
"logps/rejected": -698.3704833984375, |
|
"loss": 0.4915, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9141197204589844, |
|
"rewards/margins": 1.823472261428833, |
|
"rewards/rejected": -3.7375919818878174, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.4510556621880998, |
|
"grad_norm": 8.804078070830371, |
|
"learning_rate": 3.3466280914396117e-07, |
|
"logits/chosen": -1.5872663259506226, |
|
"logits/rejected": -1.5196198225021362, |
|
"logps/chosen": -456.85992431640625, |
|
"logps/rejected": -658.8201904296875, |
|
"loss": 0.4869, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.035572052001953, |
|
"rewards/margins": 1.8471441268920898, |
|
"rewards/rejected": -3.882716417312622, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.4534548944337812, |
|
"grad_norm": 11.092976917744208, |
|
"learning_rate": 3.326897467341281e-07, |
|
"logits/chosen": -1.5509458780288696, |
|
"logits/rejected": -1.4662238359451294, |
|
"logps/chosen": -485.05230712890625, |
|
"logps/rejected": -712.2886962890625, |
|
"loss": 0.4878, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.6332812309265137, |
|
"rewards/margins": 2.179011821746826, |
|
"rewards/rejected": -4.81229305267334, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.45585412667946257, |
|
"grad_norm": 12.839651291896095, |
|
"learning_rate": 3.3071088396708335e-07, |
|
"logits/chosen": -1.532083511352539, |
|
"logits/rejected": -1.428544282913208, |
|
"logps/chosen": -468.669921875, |
|
"logps/rejected": -751.4425048828125, |
|
"loss": 0.5366, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.547771692276001, |
|
"rewards/margins": 2.576319932937622, |
|
"rewards/rejected": -5.124091148376465, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.45825335892514396, |
|
"grad_norm": 10.082888820884584, |
|
"learning_rate": 3.2872635965218824e-07, |
|
"logits/chosen": -1.407777190208435, |
|
"logits/rejected": -1.3572627305984497, |
|
"logps/chosen": -552.7352294921875, |
|
"logps/rejected": -779.3690185546875, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.7334177494049072, |
|
"rewards/margins": 2.2001953125, |
|
"rewards/rejected": -4.93361234664917, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.46065259117082535, |
|
"grad_norm": 8.22503563553893, |
|
"learning_rate": 3.2673631299593905e-07, |
|
"logits/chosen": -1.5405181646347046, |
|
"logits/rejected": -1.3945229053497314, |
|
"logps/chosen": -509.95916748046875, |
|
"logps/rejected": -725.9969482421875, |
|
"loss": 0.4924, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.202409267425537, |
|
"rewards/margins": 2.3135194778442383, |
|
"rewards/rejected": -4.515929222106934, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.4630518234165067, |
|
"grad_norm": 11.990528680107209, |
|
"learning_rate": 3.247408835922024e-07, |
|
"logits/chosen": -1.4919646978378296, |
|
"logits/rejected": -1.400061845779419, |
|
"logps/chosen": -584.2794189453125, |
|
"logps/rejected": -807.6492919921875, |
|
"loss": 0.5074, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.5771946907043457, |
|
"rewards/margins": 2.1979637145996094, |
|
"rewards/rejected": -4.775158882141113, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.4654510556621881, |
|
"grad_norm": 11.277957093878648, |
|
"learning_rate": 3.2274021141242306e-07, |
|
"logits/chosen": -1.4834940433502197, |
|
"logits/rejected": -1.4324702024459839, |
|
"logps/chosen": -476.18499755859375, |
|
"logps/rejected": -635.02783203125, |
|
"loss": 0.501, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0429248809814453, |
|
"rewards/margins": 1.4896948337554932, |
|
"rewards/rejected": -3.5326199531555176, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.4678502879078695, |
|
"grad_norm": 15.301957388985223, |
|
"learning_rate": 3.2073443679580613e-07, |
|
"logits/chosen": -1.6484432220458984, |
|
"logits/rejected": -1.6361116170883179, |
|
"logps/chosen": -451.64276123046875, |
|
"logps/rejected": -508.2003479003906, |
|
"loss": 0.5063, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6862659454345703, |
|
"rewards/margins": 0.613060712814331, |
|
"rewards/rejected": -2.2993264198303223, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.47024952015355087, |
|
"grad_norm": 9.017778030476915, |
|
"learning_rate": 3.1872370043947194e-07, |
|
"logits/chosen": -1.6414830684661865, |
|
"logits/rejected": -1.564360499382019, |
|
"logps/chosen": -421.6163024902344, |
|
"logps/rejected": -599.665283203125, |
|
"loss": 0.4565, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4122569561004639, |
|
"rewards/margins": 1.8497397899627686, |
|
"rewards/rejected": -3.2619965076446533, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.47264875239923226, |
|
"grad_norm": 13.966059992110349, |
|
"learning_rate": 3.167081433885874e-07, |
|
"logits/chosen": -1.4791144132614136, |
|
"logits/rejected": -1.4647550582885742, |
|
"logps/chosen": -573.87353515625, |
|
"logps/rejected": -760.268798828125, |
|
"loss": 0.4637, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.4180257320404053, |
|
"rewards/margins": 1.517680287361145, |
|
"rewards/rejected": -3.935706377029419, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.4750479846449136, |
|
"grad_norm": 14.922819489110463, |
|
"learning_rate": 3.14687907026472e-07, |
|
"logits/chosen": -1.5097310543060303, |
|
"logits/rejected": -1.4952431917190552, |
|
"logps/chosen": -426.15045166015625, |
|
"logps/rejected": -578.3543090820312, |
|
"loss": 0.4975, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8940229415893555, |
|
"rewards/margins": 1.3460915088653564, |
|
"rewards/rejected": -3.240114212036133, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.477447216890595, |
|
"grad_norm": 8.988836306290604, |
|
"learning_rate": 3.126631330646801e-07, |
|
"logits/chosen": -1.6298195123672485, |
|
"logits/rejected": -1.6404377222061157, |
|
"logps/chosen": -538.1989135742188, |
|
"logps/rejected": -648.9851684570312, |
|
"loss": 0.5374, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.212888240814209, |
|
"rewards/margins": 1.0854294300079346, |
|
"rewards/rejected": -3.2983174324035645, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"grad_norm": 8.750744135615523, |
|
"learning_rate": 3.1063396353306097e-07, |
|
"logits/chosen": -1.643333077430725, |
|
"logits/rejected": -1.610515832901001, |
|
"logps/chosen": -407.2403869628906, |
|
"logps/rejected": -473.98858642578125, |
|
"loss": 0.5034, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2611894607543945, |
|
"rewards/margins": 1.0861375331878662, |
|
"rewards/rejected": -2.3473267555236816, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"eval_logits/chosen": -1.5855597257614136, |
|
"eval_logits/rejected": -1.54659903049469, |
|
"eval_logps/chosen": -422.9024963378906, |
|
"eval_logps/rejected": -588.136474609375, |
|
"eval_loss": 0.4988311231136322, |
|
"eval_rewards/accuracies": 0.7982142567634583, |
|
"eval_rewards/chosen": -1.505989670753479, |
|
"eval_rewards/margins": 1.6387678384780884, |
|
"eval_rewards/rejected": -3.1447572708129883, |
|
"eval_runtime": 52.378, |
|
"eval_samples_per_second": 85.169, |
|
"eval_steps_per_second": 1.336, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4822456813819578, |
|
"grad_norm": 13.733129427049343, |
|
"learning_rate": 3.0860054076979535e-07, |
|
"logits/chosen": -1.6123840808868408, |
|
"logits/rejected": -1.5536229610443115, |
|
"logps/chosen": -480.535888671875, |
|
"logps/rejected": -624.6920166015625, |
|
"loss": 0.4772, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8841956853866577, |
|
"rewards/margins": 1.7770442962646484, |
|
"rewards/rejected": -3.6612396240234375, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.4846449136276392, |
|
"grad_norm": 9.057968429745728, |
|
"learning_rate": 3.065630074114115e-07, |
|
"logits/chosen": -1.6468747854232788, |
|
"logits/rejected": -1.5576668977737427, |
|
"logps/chosen": -458.7217712402344, |
|
"logps/rejected": -609.1342163085938, |
|
"loss": 0.5354, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5887041091918945, |
|
"rewards/margins": 1.9728997945785522, |
|
"rewards/rejected": -3.5616040229797363, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.4870441458733205, |
|
"grad_norm": 9.014043112611292, |
|
"learning_rate": 3.0452150638277947e-07, |
|
"logits/chosen": -1.602269172668457, |
|
"logits/rejected": -1.5746511220932007, |
|
"logps/chosen": -426.79180908203125, |
|
"logps/rejected": -550.3716430664062, |
|
"loss": 0.4984, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.901236891746521, |
|
"rewards/margins": 1.115099310874939, |
|
"rewards/rejected": -3.016335964202881, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.4894433781190019, |
|
"grad_norm": 10.239650017211535, |
|
"learning_rate": 3.024761808870856e-07, |
|
"logits/chosen": -1.537592887878418, |
|
"logits/rejected": -1.5135307312011719, |
|
"logps/chosen": -394.8808898925781, |
|
"logps/rejected": -632.0445556640625, |
|
"loss": 0.4599, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.3750698566436768, |
|
"rewards/margins": 2.4639463424682617, |
|
"rewards/rejected": -3.8390164375305176, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.4918426103646833, |
|
"grad_norm": 15.750043165793306, |
|
"learning_rate": 3.004271743957875e-07, |
|
"logits/chosen": -1.6159226894378662, |
|
"logits/rejected": -1.655556321144104, |
|
"logps/chosen": -525.2373657226562, |
|
"logps/rejected": -622.0975341796875, |
|
"loss": 0.5105, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.482973098754883, |
|
"rewards/margins": 0.6949129700660706, |
|
"rewards/rejected": -3.1778860092163086, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4942418426103647, |
|
"grad_norm": 8.499109074782012, |
|
"learning_rate": 2.983746306385499e-07, |
|
"logits/chosen": -1.5946094989776611, |
|
"logits/rejected": -1.534623622894287, |
|
"logps/chosen": -453.0174255371094, |
|
"logps/rejected": -686.3258056640625, |
|
"loss": 0.521, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9426515102386475, |
|
"rewards/margins": 2.2058510780334473, |
|
"rewards/rejected": -4.148502349853516, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.4966410748560461, |
|
"grad_norm": 10.800111792066579, |
|
"learning_rate": 2.963186935931628e-07, |
|
"logits/chosen": -1.6709175109863281, |
|
"logits/rejected": -1.6887776851654053, |
|
"logps/chosen": -438.7301330566406, |
|
"logps/rejected": -554.9039306640625, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.6398264169692993, |
|
"rewards/margins": 1.2201743125915527, |
|
"rewards/rejected": -2.8600010871887207, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.4990403071017274, |
|
"grad_norm": 8.988674035333007, |
|
"learning_rate": 2.9425950747544176e-07, |
|
"logits/chosen": -1.5756020545959473, |
|
"logits/rejected": -1.5436617136001587, |
|
"logps/chosen": -561.7062377929688, |
|
"logps/rejected": -795.3497314453125, |
|
"loss": 0.4775, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.4821224212646484, |
|
"rewards/margins": 2.639726161956787, |
|
"rewards/rejected": -5.121848106384277, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5014395393474088, |
|
"grad_norm": 10.085891400781119, |
|
"learning_rate": 2.921972167291119e-07, |
|
"logits/chosen": -1.6471210718154907, |
|
"logits/rejected": -1.6008189916610718, |
|
"logps/chosen": -478.99835205078125, |
|
"logps/rejected": -686.2216186523438, |
|
"loss": 0.4919, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7925002574920654, |
|
"rewards/margins": 1.8971786499023438, |
|
"rewards/rejected": -3.689678192138672, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5038387715930902, |
|
"grad_norm": 10.421882065221322, |
|
"learning_rate": 2.9013196601567567e-07, |
|
"logits/chosen": -1.6900784969329834, |
|
"logits/rejected": -1.6556028127670288, |
|
"logps/chosen": -423.0419921875, |
|
"logps/rejected": -528.6862182617188, |
|
"loss": 0.5545, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5560649633407593, |
|
"rewards/margins": 0.938676655292511, |
|
"rewards/rejected": -2.494741916656494, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5062380038387716, |
|
"grad_norm": 8.821766288439926, |
|
"learning_rate": 2.8806390020426555e-07, |
|
"logits/chosen": -1.662415862083435, |
|
"logits/rejected": -1.6548817157745361, |
|
"logps/chosen": -453.2157287597656, |
|
"logps/rejected": -591.32470703125, |
|
"loss": 0.5049, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6261285543441772, |
|
"rewards/margins": 1.4008034467697144, |
|
"rewards/rejected": -3.0269322395324707, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.508637236084453, |
|
"grad_norm": 12.186841933449609, |
|
"learning_rate": 2.8599316436148187e-07, |
|
"logits/chosen": -1.5856435298919678, |
|
"logits/rejected": -1.5153669118881226, |
|
"logps/chosen": -424.302490234375, |
|
"logps/rejected": -558.4691162109375, |
|
"loss": 0.4916, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5989717245101929, |
|
"rewards/margins": 1.3942369222640991, |
|
"rewards/rejected": -2.993208646774292, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5110364683301344, |
|
"grad_norm": 9.950542312444133, |
|
"learning_rate": 2.8391990374121723e-07, |
|
"logits/chosen": -1.58291494846344, |
|
"logits/rejected": -1.506280541419983, |
|
"logps/chosen": -473.59130859375, |
|
"logps/rejected": -741.8995361328125, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.1417229175567627, |
|
"rewards/margins": 2.387235641479492, |
|
"rewards/rejected": -4.528958320617676, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5134357005758158, |
|
"grad_norm": 9.848738060883088, |
|
"learning_rate": 2.818442637744669e-07, |
|
"logits/chosen": -1.5625739097595215, |
|
"logits/rejected": -1.5479421615600586, |
|
"logps/chosen": -466.6238708496094, |
|
"logps/rejected": -644.1817626953125, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0364062786102295, |
|
"rewards/margins": 1.7278406620025635, |
|
"rewards/rejected": -3.764247417449951, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5158349328214972, |
|
"grad_norm": 15.060228171309086, |
|
"learning_rate": 2.797663900591284e-07, |
|
"logits/chosen": -1.6554969549179077, |
|
"logits/rejected": -1.590504765510559, |
|
"logps/chosen": -499.38653564453125, |
|
"logps/rejected": -656.7408447265625, |
|
"loss": 0.4641, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2184956073760986, |
|
"rewards/margins": 1.8517191410064697, |
|
"rewards/rejected": -4.07021427154541, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5182341650671785, |
|
"grad_norm": 11.311863584668101, |
|
"learning_rate": 2.776864283497874e-07, |
|
"logits/chosen": -1.6083641052246094, |
|
"logits/rejected": -1.4705779552459717, |
|
"logps/chosen": -442.3028869628906, |
|
"logps/rejected": -744.4788818359375, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9584490060806274, |
|
"rewards/margins": 3.1203114986419678, |
|
"rewards/rejected": -5.078760623931885, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5206333973128598, |
|
"grad_norm": 10.28461247325414, |
|
"learning_rate": 2.756045245474943e-07, |
|
"logits/chosen": -1.7539141178131104, |
|
"logits/rejected": -1.7315905094146729, |
|
"logps/chosen": -480.287353515625, |
|
"logps/rejected": -616.8678588867188, |
|
"loss": 0.5098, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0116958618164062, |
|
"rewards/margins": 1.2387454509735107, |
|
"rewards/rejected": -3.250441312789917, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5230326295585412, |
|
"grad_norm": 9.31477755841552, |
|
"learning_rate": 2.7352082468952977e-07, |
|
"logits/chosen": -1.5500714778900146, |
|
"logits/rejected": -1.497689962387085, |
|
"logps/chosen": -472.05908203125, |
|
"logps/rejected": -784.3190307617188, |
|
"loss": 0.5352, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0980050563812256, |
|
"rewards/margins": 2.975919485092163, |
|
"rewards/rejected": -5.073924541473389, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.5254318618042226, |
|
"grad_norm": 9.270464819675132, |
|
"learning_rate": 2.7143547493916e-07, |
|
"logits/chosen": -1.610926866531372, |
|
"logits/rejected": -1.4779975414276123, |
|
"logps/chosen": -438.76495361328125, |
|
"logps/rejected": -772.690673828125, |
|
"loss": 0.4729, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.654784917831421, |
|
"rewards/margins": 3.3569869995117188, |
|
"rewards/rejected": -5.011772632598877, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.527831094049904, |
|
"grad_norm": 11.881300504373025, |
|
"learning_rate": 2.693486215753853e-07, |
|
"logits/chosen": -1.6387016773223877, |
|
"logits/rejected": -1.5379244089126587, |
|
"logps/chosen": -487.00250244140625, |
|
"logps/rejected": -804.3506469726562, |
|
"loss": 0.5173, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.203137159347534, |
|
"rewards/margins": 3.4220173358917236, |
|
"rewards/rejected": -5.625154495239258, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5302303262955854, |
|
"grad_norm": 11.209078276132704, |
|
"learning_rate": 2.6726041098267805e-07, |
|
"logits/chosen": -1.7330009937286377, |
|
"logits/rejected": -1.610637903213501, |
|
"logps/chosen": -492.36883544921875, |
|
"logps/rejected": -618.7637329101562, |
|
"loss": 0.5552, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.8613355159759521, |
|
"rewards/margins": 1.652645468711853, |
|
"rewards/rejected": -3.5139803886413574, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5326295585412668, |
|
"grad_norm": 18.552236297372197, |
|
"learning_rate": 2.6517098964071507e-07, |
|
"logits/chosen": -1.6331803798675537, |
|
"logits/rejected": -1.648450493812561, |
|
"logps/chosen": -422.33184814453125, |
|
"logps/rejected": -502.978271484375, |
|
"loss": 0.5321, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5346616506576538, |
|
"rewards/margins": 0.6287662386894226, |
|
"rewards/rejected": -2.1634278297424316, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5350287907869482, |
|
"grad_norm": 21.4582048998974, |
|
"learning_rate": 2.630805041141023e-07, |
|
"logits/chosen": -1.5773918628692627, |
|
"logits/rejected": -1.5001894235610962, |
|
"logps/chosen": -385.9847412109375, |
|
"logps/rejected": -727.9683837890625, |
|
"loss": 0.489, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4480630159378052, |
|
"rewards/margins": 3.2581539154052734, |
|
"rewards/rejected": -4.706217288970947, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5374280230326296, |
|
"grad_norm": 26.843224623928677, |
|
"learning_rate": 2.609891010420941e-07, |
|
"logits/chosen": -1.6596715450286865, |
|
"logits/rejected": -1.5660836696624756, |
|
"logps/chosen": -512.3265380859375, |
|
"logps/rejected": -711.9317016601562, |
|
"loss": 0.4656, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.2569899559020996, |
|
"rewards/margins": 1.9923429489135742, |
|
"rewards/rejected": -4.249333381652832, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.539827255278311, |
|
"grad_norm": 10.63742659021482, |
|
"learning_rate": 2.5889692712830674e-07, |
|
"logits/chosen": -1.7071069478988647, |
|
"logits/rejected": -1.6586761474609375, |
|
"logps/chosen": -430.1871643066406, |
|
"logps/rejected": -572.591064453125, |
|
"loss": 0.4918, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8596538305282593, |
|
"rewards/margins": 1.5033780336380005, |
|
"rewards/rejected": -3.363032102584839, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5422264875239923, |
|
"grad_norm": 14.567934039227358, |
|
"learning_rate": 2.5680412913042843e-07, |
|
"logits/chosen": -1.5422347784042358, |
|
"logits/rejected": -1.4246468544006348, |
|
"logps/chosen": -479.8147888183594, |
|
"logps/rejected": -750.625, |
|
"loss": 0.5143, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.149322986602783, |
|
"rewards/margins": 2.762144088745117, |
|
"rewards/rejected": -4.911467552185059, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5446257197696737, |
|
"grad_norm": 17.356455187998073, |
|
"learning_rate": 2.5471085384992404e-07, |
|
"logits/chosen": -1.5986369848251343, |
|
"logits/rejected": -1.5308504104614258, |
|
"logps/chosen": -437.1161193847656, |
|
"logps/rejected": -754.64453125, |
|
"loss": 0.4815, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.717659592628479, |
|
"rewards/margins": 3.181501626968384, |
|
"rewards/rejected": -4.899160861968994, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5470249520153551, |
|
"grad_norm": 9.28953605758801, |
|
"learning_rate": 2.526172481217381e-07, |
|
"logits/chosen": -1.5438369512557983, |
|
"logits/rejected": -1.5630390644073486, |
|
"logps/chosen": -416.0315856933594, |
|
"logps/rejected": -573.1238403320312, |
|
"loss": 0.5108, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.984635591506958, |
|
"rewards/margins": 1.3827749490737915, |
|
"rewards/rejected": -3.367410182952881, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5494241842610365, |
|
"grad_norm": 8.596639384497214, |
|
"learning_rate": 2.5052345880399456e-07, |
|
"logits/chosen": -1.6020433902740479, |
|
"logits/rejected": -1.558334469795227, |
|
"logps/chosen": -431.74029541015625, |
|
"logps/rejected": -551.8709106445312, |
|
"loss": 0.4808, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.8813968896865845, |
|
"rewards/margins": 1.2201679944992065, |
|
"rewards/rejected": -3.10156512260437, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5518234165067178, |
|
"grad_norm": 11.753794107565938, |
|
"learning_rate": 2.4842963276769555e-07, |
|
"logits/chosen": -1.5615367889404297, |
|
"logits/rejected": -1.5330339670181274, |
|
"logps/chosen": -437.43084716796875, |
|
"logps/rejected": -621.0929565429688, |
|
"loss": 0.5155, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.09666109085083, |
|
"rewards/margins": 1.3752198219299316, |
|
"rewards/rejected": -3.4718806743621826, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5542226487523992, |
|
"grad_norm": 10.071023503443175, |
|
"learning_rate": 2.463359168864189e-07, |
|
"logits/chosen": -1.6465524435043335, |
|
"logits/rejected": -1.646805763244629, |
|
"logps/chosen": -486.73565673828125, |
|
"logps/rejected": -566.8058471679688, |
|
"loss": 0.5368, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7878824472427368, |
|
"rewards/margins": 1.157956838607788, |
|
"rewards/rejected": -2.9458394050598145, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5566218809980806, |
|
"grad_norm": 11.942512663088241, |
|
"learning_rate": 2.4424245802601555e-07, |
|
"logits/chosen": -1.6439406871795654, |
|
"logits/rejected": -1.6925067901611328, |
|
"logps/chosen": -371.7730407714844, |
|
"logps/rejected": -524.0623168945312, |
|
"loss": 0.4857, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3829593658447266, |
|
"rewards/margins": 0.9715530276298523, |
|
"rewards/rejected": -2.3545122146606445, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.559021113243762, |
|
"grad_norm": 12.373131204862757, |
|
"learning_rate": 2.421494030343072e-07, |
|
"logits/chosen": -1.5384362936019897, |
|
"logits/rejected": -1.480912446975708, |
|
"logps/chosen": -452.34100341796875, |
|
"logps/rejected": -508.7037048339844, |
|
"loss": 0.5744, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7134408950805664, |
|
"rewards/margins": 1.0949541330337524, |
|
"rewards/rejected": -2.808394432067871, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5614203454894434, |
|
"grad_norm": 12.389004819032923, |
|
"learning_rate": 2.400568987307861e-07, |
|
"logits/chosen": -1.5702178478240967, |
|
"logits/rejected": -1.5472772121429443, |
|
"logps/chosen": -392.81146240234375, |
|
"logps/rejected": -411.69317626953125, |
|
"loss": 0.4819, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4531047344207764, |
|
"rewards/margins": 0.3802509605884552, |
|
"rewards/rejected": -1.8333555459976196, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5638195777351248, |
|
"grad_norm": 10.648864731231448, |
|
"learning_rate": 2.379650918963156e-07, |
|
"logits/chosen": -1.6462970972061157, |
|
"logits/rejected": -1.6218713521957397, |
|
"logps/chosen": -369.7726745605469, |
|
"logps/rejected": -520.507080078125, |
|
"loss": 0.5095, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6180322170257568, |
|
"rewards/margins": 1.4091498851776123, |
|
"rewards/rejected": -3.027182102203369, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5662188099808061, |
|
"grad_norm": 21.383140181787045, |
|
"learning_rate": 2.3587412926283438e-07, |
|
"logits/chosen": -1.6451104879379272, |
|
"logits/rejected": -1.5843571424484253, |
|
"logps/chosen": -529.2950439453125, |
|
"logps/rejected": -677.9429321289062, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.010671615600586, |
|
"rewards/margins": 1.9736932516098022, |
|
"rewards/rejected": -3.9843647480010986, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.5686180422264875, |
|
"grad_norm": 11.060278026039455, |
|
"learning_rate": 2.337841575030642e-07, |
|
"logits/chosen": -1.6365985870361328, |
|
"logits/rejected": -1.6005491018295288, |
|
"logps/chosen": -461.49749755859375, |
|
"logps/rejected": -656.2427978515625, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6665138006210327, |
|
"rewards/margins": 1.8031669855117798, |
|
"rewards/rejected": -3.4696803092956543, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.5710172744721689, |
|
"grad_norm": 12.434301470105313, |
|
"learning_rate": 2.316953232202206e-07, |
|
"logits/chosen": -1.5425972938537598, |
|
"logits/rejected": -1.5300174951553345, |
|
"logps/chosen": -429.36444091796875, |
|
"logps/rejected": -472.57568359375, |
|
"loss": 0.4711, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7053802013397217, |
|
"rewards/margins": 1.1300846338272095, |
|
"rewards/rejected": -2.8354649543762207, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.5734165067178503, |
|
"grad_norm": 11.159233566131416, |
|
"learning_rate": 2.2960777293772958e-07, |
|
"logits/chosen": -1.4974268674850464, |
|
"logits/rejected": -1.412022590637207, |
|
"logps/chosen": -410.587890625, |
|
"logps/rejected": -676.4757690429688, |
|
"loss": 0.4907, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8208236694335938, |
|
"rewards/margins": 2.8905091285705566, |
|
"rewards/rejected": -4.71133279800415, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.5758157389635317, |
|
"grad_norm": 8.941122859434415, |
|
"learning_rate": 2.2752165308894974e-07, |
|
"logits/chosen": -1.5879501104354858, |
|
"logits/rejected": -1.538260579109192, |
|
"logps/chosen": -387.39886474609375, |
|
"logps/rejected": -541.6805419921875, |
|
"loss": 0.4716, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7686420679092407, |
|
"rewards/margins": 1.6267540454864502, |
|
"rewards/rejected": -3.3953964710235596, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5782149712092131, |
|
"grad_norm": 12.044861445889701, |
|
"learning_rate": 2.254371100069005e-07, |
|
"logits/chosen": -1.5681109428405762, |
|
"logits/rejected": -1.5912139415740967, |
|
"logps/chosen": -399.2314453125, |
|
"logps/rejected": -558.3780517578125, |
|
"loss": 0.4731, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.4787284135818481, |
|
"rewards/margins": 1.385317325592041, |
|
"rewards/rejected": -2.8640456199645996, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5806142034548945, |
|
"grad_norm": 14.345741676788231, |
|
"learning_rate": 2.2335428991399725e-07, |
|
"logits/chosen": -1.4331409931182861, |
|
"logits/rejected": -1.3503481149673462, |
|
"logps/chosen": -473.1424255371094, |
|
"logps/rejected": -837.6438598632812, |
|
"loss": 0.4977, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.4872748851776123, |
|
"rewards/margins": 3.6537222862243652, |
|
"rewards/rejected": -6.140997409820557, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.5830134357005758, |
|
"grad_norm": 13.4432658994951, |
|
"learning_rate": 2.2127333891179458e-07, |
|
"logits/chosen": -1.574540138244629, |
|
"logits/rejected": -1.505368709564209, |
|
"logps/chosen": -423.1871032714844, |
|
"logps/rejected": -710.91845703125, |
|
"loss": 0.5206, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.972616195678711, |
|
"rewards/margins": 2.7154417037963867, |
|
"rewards/rejected": -4.6880574226379395, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.5854126679462572, |
|
"grad_norm": 11.856242883233412, |
|
"learning_rate": 2.1919440297073782e-07, |
|
"logits/chosen": -1.5872819423675537, |
|
"logits/rejected": -1.4878933429718018, |
|
"logps/chosen": -429.8773498535156, |
|
"logps/rejected": -675.3303833007812, |
|
"loss": 0.5143, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9643665552139282, |
|
"rewards/margins": 2.4596877098083496, |
|
"rewards/rejected": -4.424054145812988, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.5878119001919386, |
|
"grad_norm": 12.96744179023995, |
|
"learning_rate": 2.1711762791992368e-07, |
|
"logits/chosen": -1.5757300853729248, |
|
"logits/rejected": -1.498997449874878, |
|
"logps/chosen": -521.7322998046875, |
|
"logps/rejected": -615.30712890625, |
|
"loss": 0.5374, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.17085337638855, |
|
"rewards/margins": 1.284227967262268, |
|
"rewards/rejected": -3.4550812244415283, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.5902111324376199, |
|
"grad_norm": 10.542916107819408, |
|
"learning_rate": 2.1504315943687114e-07, |
|
"logits/chosen": -1.6688730716705322, |
|
"logits/rejected": -1.6244138479232788, |
|
"logps/chosen": -413.05352783203125, |
|
"logps/rejected": -669.3853149414062, |
|
"loss": 0.5043, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.667035698890686, |
|
"rewards/margins": 2.1492269039154053, |
|
"rewards/rejected": -3.8162624835968018, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.5926103646833013, |
|
"grad_norm": 13.515470462935513, |
|
"learning_rate": 2.1297114303730248e-07, |
|
"logits/chosen": -1.5288382768630981, |
|
"logits/rejected": -1.4609134197235107, |
|
"logps/chosen": -422.8729553222656, |
|
"logps/rejected": -724.0157470703125, |
|
"loss": 0.5295, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.968060851097107, |
|
"rewards/margins": 2.4587647914886475, |
|
"rewards/rejected": -4.426825523376465, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.5950095969289827, |
|
"grad_norm": 14.338489620286177, |
|
"learning_rate": 2.1090172406493616e-07, |
|
"logits/chosen": -1.5649030208587646, |
|
"logits/rejected": -1.5623984336853027, |
|
"logps/chosen": -399.79290771484375, |
|
"logps/rejected": -583.4332275390625, |
|
"loss": 0.4501, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.5557973384857178, |
|
"rewards/margins": 1.7626543045043945, |
|
"rewards/rejected": -3.3184516429901123, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.5974088291746641, |
|
"grad_norm": 17.510238929607876, |
|
"learning_rate": 2.0883504768129146e-07, |
|
"logits/chosen": -1.6265462636947632, |
|
"logits/rejected": -1.598730444908142, |
|
"logps/chosen": -497.2835998535156, |
|
"logps/rejected": -648.0611572265625, |
|
"loss": 0.5081, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.131727695465088, |
|
"rewards/margins": 1.5764387845993042, |
|
"rewards/rejected": -3.7081668376922607, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.5998080614203455, |
|
"grad_norm": 11.33500534140664, |
|
"learning_rate": 2.0677125885550571e-07, |
|
"logits/chosen": -1.4689067602157593, |
|
"logits/rejected": -1.453774094581604, |
|
"logps/chosen": -439.34075927734375, |
|
"logps/rejected": -537.5209350585938, |
|
"loss": 0.4906, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.8917917013168335, |
|
"rewards/margins": 1.3580577373504639, |
|
"rewards/rejected": -3.249849319458008, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6022072936660269, |
|
"grad_norm": 13.216366762387246, |
|
"learning_rate": 2.0471050235416587e-07, |
|
"logits/chosen": -1.578136920928955, |
|
"logits/rejected": -1.5401719808578491, |
|
"logps/chosen": -500.56072998046875, |
|
"logps/rejected": -603.341796875, |
|
"loss": 0.4649, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.2012569904327393, |
|
"rewards/margins": 1.514261245727539, |
|
"rewards/rejected": -3.71551775932312, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6046065259117083, |
|
"grad_norm": 13.494163188740151, |
|
"learning_rate": 2.026529227311532e-07, |
|
"logits/chosen": -1.5589288473129272, |
|
"logits/rejected": -1.4520585536956787, |
|
"logps/chosen": -436.919189453125, |
|
"logps/rejected": -646.716796875, |
|
"loss": 0.5339, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0600430965423584, |
|
"rewards/margins": 2.0313327312469482, |
|
"rewards/rejected": -4.091375827789307, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6070057581573897, |
|
"grad_norm": 13.27727238694429, |
|
"learning_rate": 2.005986643175036e-07, |
|
"logits/chosen": -1.5554237365722656, |
|
"logits/rejected": -1.5433388948440552, |
|
"logps/chosen": -461.245849609375, |
|
"logps/rejected": -686.7439575195312, |
|
"loss": 0.4396, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.842258095741272, |
|
"rewards/margins": 2.4114012718200684, |
|
"rewards/rejected": -4.253658771514893, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6094049904030711, |
|
"grad_norm": 13.0139085843487, |
|
"learning_rate": 1.9854787121128328e-07, |
|
"logits/chosen": -1.51650071144104, |
|
"logits/rejected": -1.4218528270721436, |
|
"logps/chosen": -408.762939453125, |
|
"logps/rejected": -523.1329956054688, |
|
"loss": 0.5186, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.722747802734375, |
|
"rewards/margins": 1.6003834009170532, |
|
"rewards/rejected": -3.3231310844421387, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6118042226487524, |
|
"grad_norm": 10.279174762722011, |
|
"learning_rate": 1.9650068726748106e-07, |
|
"logits/chosen": -1.4751393795013428, |
|
"logits/rejected": -1.4030897617340088, |
|
"logps/chosen": -480.7040100097656, |
|
"logps/rejected": -683.616943359375, |
|
"loss": 0.5435, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9335578680038452, |
|
"rewards/margins": 2.1118619441986084, |
|
"rewards/rejected": -4.045419692993164, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6142034548944337, |
|
"grad_norm": 10.58914230166321, |
|
"learning_rate": 1.9445725608791718e-07, |
|
"logits/chosen": -1.4881677627563477, |
|
"logits/rejected": -1.339155673980713, |
|
"logps/chosen": -489.5857849121094, |
|
"logps/rejected": -993.9429931640625, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1533727645874023, |
|
"rewards/margins": 4.9657697677612305, |
|
"rewards/rejected": -7.119143009185791, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6166026871401151, |
|
"grad_norm": 10.044783509073032, |
|
"learning_rate": 1.924177210111705e-07, |
|
"logits/chosen": -1.5943437814712524, |
|
"logits/rejected": -1.4688080549240112, |
|
"logps/chosen": -434.68804931640625, |
|
"logps/rejected": -752.3925170898438, |
|
"loss": 0.4961, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8741295337677002, |
|
"rewards/margins": 3.1029107570648193, |
|
"rewards/rejected": -4.977039813995361, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6190019193857965, |
|
"grad_norm": 10.803603559596482, |
|
"learning_rate": 1.9038222510252364e-07, |
|
"logits/chosen": -1.63632071018219, |
|
"logits/rejected": -1.5772790908813477, |
|
"logps/chosen": -425.29705810546875, |
|
"logps/rejected": -547.4097900390625, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6209042072296143, |
|
"rewards/margins": 1.3503049612045288, |
|
"rewards/rejected": -2.9712090492248535, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6214011516314779, |
|
"grad_norm": 13.398493396695924, |
|
"learning_rate": 1.883509111439277e-07, |
|
"logits/chosen": -1.5346853733062744, |
|
"logits/rejected": -1.4362837076187134, |
|
"logps/chosen": -431.0760192871094, |
|
"logps/rejected": -812.218017578125, |
|
"loss": 0.5172, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9112809896469116, |
|
"rewards/margins": 3.1443681716918945, |
|
"rewards/rejected": -5.055649757385254, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6238003838771593, |
|
"grad_norm": 8.974066504769196, |
|
"learning_rate": 1.8632392162398665e-07, |
|
"logits/chosen": -1.6221952438354492, |
|
"logits/rejected": -1.5492918491363525, |
|
"logps/chosen": -503.0997009277344, |
|
"logps/rejected": -713.653564453125, |
|
"loss": 0.4848, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.9071382284164429, |
|
"rewards/margins": 2.196667432785034, |
|
"rewards/rejected": -4.1038055419921875, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6261996161228407, |
|
"grad_norm": 10.465480641447618, |
|
"learning_rate": 1.84301398727962e-07, |
|
"logits/chosen": -1.463478922843933, |
|
"logits/rejected": -1.373296856880188, |
|
"logps/chosen": -358.58990478515625, |
|
"logps/rejected": -702.1085205078125, |
|
"loss": 0.5041, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6315553188323975, |
|
"rewards/margins": 3.0978779792785645, |
|
"rewards/rejected": -4.729433536529541, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6285988483685221, |
|
"grad_norm": 16.447249304360692, |
|
"learning_rate": 1.8228348432779966e-07, |
|
"logits/chosen": -1.601949691772461, |
|
"logits/rejected": -1.5340659618377686, |
|
"logps/chosen": -446.01934814453125, |
|
"logps/rejected": -602.1727294921875, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0065500736236572, |
|
"rewards/margins": 1.6265771389007568, |
|
"rewards/rejected": -3.633127212524414, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6309980806142035, |
|
"grad_norm": 9.975684368561806, |
|
"learning_rate": 1.8027031997217773e-07, |
|
"logits/chosen": -1.496711015701294, |
|
"logits/rejected": -1.3451837301254272, |
|
"logps/chosen": -453.6783752441406, |
|
"logps/rejected": -909.4461059570312, |
|
"loss": 0.4633, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.1523375511169434, |
|
"rewards/margins": 4.429135799407959, |
|
"rewards/rejected": -6.581473350524902, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6333973128598849, |
|
"grad_norm": 9.551241278071267, |
|
"learning_rate": 1.7826204687657758e-07, |
|
"logits/chosen": -1.5323810577392578, |
|
"logits/rejected": -1.4816257953643799, |
|
"logps/chosen": -476.60723876953125, |
|
"logps/rejected": -552.1718139648438, |
|
"loss": 0.4779, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8675925731658936, |
|
"rewards/margins": 1.1629068851470947, |
|
"rewards/rejected": -3.0304996967315674, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6357965451055663, |
|
"grad_norm": 11.460448418061953, |
|
"learning_rate": 1.762588059133781e-07, |
|
"logits/chosen": -1.5307328701019287, |
|
"logits/rejected": -1.4167407751083374, |
|
"logps/chosen": -492.13885498046875, |
|
"logps/rejected": -644.9505004882812, |
|
"loss": 0.4905, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7892935276031494, |
|
"rewards/margins": 1.8879598379135132, |
|
"rewards/rejected": -3.6772537231445312, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6381957773512476, |
|
"grad_norm": 18.293188421454047, |
|
"learning_rate": 1.7426073760197406e-07, |
|
"logits/chosen": -1.6222972869873047, |
|
"logits/rejected": -1.5244884490966797, |
|
"logps/chosen": -476.624267578125, |
|
"logps/rejected": -844.6385498046875, |
|
"loss": 0.5186, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.1062722206115723, |
|
"rewards/margins": 3.3689582347869873, |
|
"rewards/rejected": -5.4752302169799805, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6405950095969289, |
|
"grad_norm": 9.733158102478546, |
|
"learning_rate": 1.7226798209891935e-07, |
|
"logits/chosen": -1.5991920232772827, |
|
"logits/rejected": -1.5383590459823608, |
|
"logps/chosen": -447.33721923828125, |
|
"logps/rejected": -570.9419555664062, |
|
"loss": 0.4602, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.72260320186615, |
|
"rewards/margins": 1.8290891647338867, |
|
"rewards/rejected": -3.551692247390747, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6429942418426103, |
|
"grad_norm": 13.946180043271863, |
|
"learning_rate": 1.7028067918809535e-07, |
|
"logits/chosen": -1.5528197288513184, |
|
"logits/rejected": -1.3879868984222412, |
|
"logps/chosen": -410.92718505859375, |
|
"logps/rejected": -823.4285888671875, |
|
"loss": 0.4851, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.739851713180542, |
|
"rewards/margins": 3.7594234943389893, |
|
"rewards/rejected": -5.499274730682373, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6453934740882917, |
|
"grad_norm": 10.290377075371278, |
|
"learning_rate": 1.6829896827090584e-07, |
|
"logits/chosen": -1.6512863636016846, |
|
"logits/rejected": -1.6360222101211548, |
|
"logps/chosen": -465.19073486328125, |
|
"logps/rejected": -523.7138061523438, |
|
"loss": 0.554, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.8953874111175537, |
|
"rewards/margins": 0.9017803072929382, |
|
"rewards/rejected": -2.797168254852295, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.6477927063339731, |
|
"grad_norm": 8.491674840989798, |
|
"learning_rate": 1.6632298835649844e-07, |
|
"logits/chosen": -1.6026594638824463, |
|
"logits/rejected": -1.5701754093170166, |
|
"logps/chosen": -467.7550354003906, |
|
"logps/rejected": -753.0574951171875, |
|
"loss": 0.4577, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8194019794464111, |
|
"rewards/margins": 2.601040840148926, |
|
"rewards/rejected": -4.420442581176758, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6501919385796545, |
|
"grad_norm": 31.306228191814924, |
|
"learning_rate": 1.6435287805201364e-07, |
|
"logits/chosen": -1.5099729299545288, |
|
"logits/rejected": -1.4581714868545532, |
|
"logps/chosen": -468.90753173828125, |
|
"logps/rejected": -623.4779663085938, |
|
"loss": 0.515, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9226620197296143, |
|
"rewards/margins": 1.5821655988693237, |
|
"rewards/rejected": -3.5048279762268066, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6525911708253359, |
|
"grad_norm": 11.64208369065543, |
|
"learning_rate": 1.6238877555286207e-07, |
|
"logits/chosen": -1.6213748455047607, |
|
"logits/rejected": -1.5881431102752686, |
|
"logps/chosen": -452.04132080078125, |
|
"logps/rejected": -652.6304931640625, |
|
"loss": 0.463, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6790266036987305, |
|
"rewards/margins": 1.9346497058868408, |
|
"rewards/rejected": -3.6136765480041504, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.6549904030710173, |
|
"grad_norm": 13.434697867289191, |
|
"learning_rate": 1.60430818633031e-07, |
|
"logits/chosen": -1.717829704284668, |
|
"logits/rejected": -1.6711931228637695, |
|
"logps/chosen": -415.7367248535156, |
|
"logps/rejected": -573.2308959960938, |
|
"loss": 0.4395, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4689064025878906, |
|
"rewards/margins": 1.6664180755615234, |
|
"rewards/rejected": -3.135324239730835, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6573896353166987, |
|
"grad_norm": 9.195466035459773, |
|
"learning_rate": 1.5847914463541939e-07, |
|
"logits/chosen": -1.4839345216751099, |
|
"logits/rejected": -1.4070708751678467, |
|
"logps/chosen": -408.98046875, |
|
"logps/rejected": -607.1571044921875, |
|
"loss": 0.4778, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.960526704788208, |
|
"rewards/margins": 1.7776432037353516, |
|
"rewards/rejected": -3.7381699085235596, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6597888675623801, |
|
"grad_norm": 8.168397030598346, |
|
"learning_rate": 1.5653389046220427e-07, |
|
"logits/chosen": -1.5618484020233154, |
|
"logits/rejected": -1.5674700736999512, |
|
"logps/chosen": -410.63739013671875, |
|
"logps/rejected": -545.4283447265625, |
|
"loss": 0.5017, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5986849069595337, |
|
"rewards/margins": 1.2204779386520386, |
|
"rewards/rejected": -2.8191628456115723, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6621880998080614, |
|
"grad_norm": 15.795620020388752, |
|
"learning_rate": 1.545951925652375e-07, |
|
"logits/chosen": -1.574741244316101, |
|
"logits/rejected": -1.5150604248046875, |
|
"logps/chosen": -502.17840576171875, |
|
"logps/rejected": -597.1779174804688, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.739625334739685, |
|
"rewards/margins": 1.5817149877548218, |
|
"rewards/rejected": -3.3213400840759277, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6645873320537428, |
|
"grad_norm": 12.564082860552478, |
|
"learning_rate": 1.5266318693647423e-07, |
|
"logits/chosen": -1.604776382446289, |
|
"logits/rejected": -1.5733304023742676, |
|
"logps/chosen": -460.91168212890625, |
|
"logps/rejected": -548.9085083007812, |
|
"loss": 0.4572, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.717616081237793, |
|
"rewards/margins": 0.9698230624198914, |
|
"rewards/rejected": -2.68743896484375, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.6669865642994242, |
|
"grad_norm": 12.201358595051783, |
|
"learning_rate": 1.5073800909843353e-07, |
|
"logits/chosen": -1.6035501956939697, |
|
"logits/rejected": -1.4797896146774292, |
|
"logps/chosen": -501.05548095703125, |
|
"logps/rejected": -667.8190307617188, |
|
"loss": 0.4558, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.150601863861084, |
|
"rewards/margins": 2.3036932945251465, |
|
"rewards/rejected": -4.4542951583862305, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6693857965451055, |
|
"grad_norm": 10.795967175298328, |
|
"learning_rate": 1.488197940946922e-07, |
|
"logits/chosen": -1.5843151807785034, |
|
"logits/rejected": -1.5326802730560303, |
|
"logps/chosen": -501.5411682128906, |
|
"logps/rejected": -611.32568359375, |
|
"loss": 0.4764, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.1024060249328613, |
|
"rewards/margins": 1.6799418926239014, |
|
"rewards/rejected": -3.7823474407196045, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.6717850287907869, |
|
"grad_norm": 28.011002170203856, |
|
"learning_rate": 1.4690867648041167e-07, |
|
"logits/chosen": -1.5918710231781006, |
|
"logits/rejected": -1.4779281616210938, |
|
"logps/chosen": -473.7042541503906, |
|
"logps/rejected": -662.2276000976562, |
|
"loss": 0.5129, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0022711753845215, |
|
"rewards/margins": 2.1550307273864746, |
|
"rewards/rejected": -4.157301902770996, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6741842610364683, |
|
"grad_norm": 12.528624583800916, |
|
"learning_rate": 1.4500479031289987e-07, |
|
"logits/chosen": -1.6873347759246826, |
|
"logits/rejected": -1.596543312072754, |
|
"logps/chosen": -460.88909912109375, |
|
"logps/rejected": -586.3327026367188, |
|
"loss": 0.5198, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7791045904159546, |
|
"rewards/margins": 1.3285664319992065, |
|
"rewards/rejected": -3.1076712608337402, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.6765834932821497, |
|
"grad_norm": 11.065013602236313, |
|
"learning_rate": 1.4310826914220747e-07, |
|
"logits/chosen": -1.7246005535125732, |
|
"logits/rejected": -1.7012087106704712, |
|
"logps/chosen": -493.4828186035156, |
|
"logps/rejected": -576.1470336914062, |
|
"loss": 0.5056, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8482357263565063, |
|
"rewards/margins": 0.951060950756073, |
|
"rewards/rejected": -2.7992968559265137, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.6789827255278311, |
|
"grad_norm": 11.565962251192635, |
|
"learning_rate": 1.412192460017597e-07, |
|
"logits/chosen": -1.5370794534683228, |
|
"logits/rejected": -1.459987759590149, |
|
"logps/chosen": -463.569580078125, |
|
"logps/rejected": -711.2032470703125, |
|
"loss": 0.5085, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.087894916534424, |
|
"rewards/margins": 2.450357675552368, |
|
"rewards/rejected": -4.538252353668213, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.6813819577735125, |
|
"grad_norm": 9.118437836656124, |
|
"learning_rate": 1.3933785339902504e-07, |
|
"logits/chosen": -1.5175681114196777, |
|
"logits/rejected": -1.5240638256072998, |
|
"logps/chosen": -382.7988586425781, |
|
"logps/rejected": -606.9676513671875, |
|
"loss": 0.5099, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6014810800552368, |
|
"rewards/margins": 1.8436975479125977, |
|
"rewards/rejected": -3.445178508758545, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.6837811900191939, |
|
"grad_norm": 8.445711443707477, |
|
"learning_rate": 1.374642233062197e-07, |
|
"logits/chosen": -1.6221126317977905, |
|
"logits/rejected": -1.5964024066925049, |
|
"logps/chosen": -470.7940368652344, |
|
"logps/rejected": -608.9495239257812, |
|
"loss": 0.5176, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7363065481185913, |
|
"rewards/margins": 1.7359542846679688, |
|
"rewards/rejected": -3.4722609519958496, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6861804222648752, |
|
"grad_norm": 8.500425735666235, |
|
"learning_rate": 1.355984871510511e-07, |
|
"logits/chosen": -1.5512058734893799, |
|
"logits/rejected": -1.5746601819992065, |
|
"logps/chosen": -498.8207092285156, |
|
"logps/rejected": -657.9988403320312, |
|
"loss": 0.4596, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.9174222946166992, |
|
"rewards/margins": 1.5131709575653076, |
|
"rewards/rejected": -3.430593490600586, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.6885796545105566, |
|
"grad_norm": 12.852665923928024, |
|
"learning_rate": 1.3374077580749783e-07, |
|
"logits/chosen": -1.6059240102767944, |
|
"logits/rejected": -1.5994579792022705, |
|
"logps/chosen": -344.4139404296875, |
|
"logps/rejected": -512.6463623046875, |
|
"loss": 0.4934, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4501264095306396, |
|
"rewards/margins": 1.4970743656158447, |
|
"rewards/rejected": -2.9472010135650635, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.690978886756238, |
|
"grad_norm": 17.194251207513553, |
|
"learning_rate": 1.3189121958663024e-07, |
|
"logits/chosen": -1.643771767616272, |
|
"logits/rejected": -1.5831727981567383, |
|
"logps/chosen": -502.8651428222656, |
|
"logps/rejected": -554.9192504882812, |
|
"loss": 0.493, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.0862865447998047, |
|
"rewards/margins": 0.8170648813247681, |
|
"rewards/rejected": -2.903351306915283, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.6933781190019194, |
|
"grad_norm": 12.906100640591236, |
|
"learning_rate": 1.3004994822746895e-07, |
|
"logits/chosen": -1.7391399145126343, |
|
"logits/rejected": -1.6716740131378174, |
|
"logps/chosen": -431.04180908203125, |
|
"logps/rejected": -561.5888671875, |
|
"loss": 0.5147, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6912224292755127, |
|
"rewards/margins": 1.2724682092666626, |
|
"rewards/rejected": -2.9636902809143066, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.6957773512476008, |
|
"grad_norm": 9.565619443461362, |
|
"learning_rate": 1.2821709088788434e-07, |
|
"logits/chosen": -1.5313981771469116, |
|
"logits/rejected": -1.5281095504760742, |
|
"logps/chosen": -398.6699523925781, |
|
"logps/rejected": -540.1404418945312, |
|
"loss": 0.5184, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8083369731903076, |
|
"rewards/margins": 1.3905736207962036, |
|
"rewards/rejected": -3.198910713195801, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6981765834932822, |
|
"grad_norm": 15.033651183019835, |
|
"learning_rate": 1.2639277613553736e-07, |
|
"logits/chosen": -1.469612717628479, |
|
"logits/rejected": -1.414353609085083, |
|
"logps/chosen": -363.5370178222656, |
|
"logps/rejected": -500.765380859375, |
|
"loss": 0.486, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.468360185623169, |
|
"rewards/margins": 1.3389683961868286, |
|
"rewards/rejected": -2.807328701019287, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7005758157389635, |
|
"grad_norm": 9.435304971614766, |
|
"learning_rate": 1.2457713193885975e-07, |
|
"logits/chosen": -1.461320161819458, |
|
"logits/rejected": -1.4299747943878174, |
|
"logps/chosen": -376.7994079589844, |
|
"logps/rejected": -629.6224975585938, |
|
"loss": 0.4765, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.896950364112854, |
|
"rewards/margins": 2.1872236728668213, |
|
"rewards/rejected": -4.084174156188965, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7029750479846449, |
|
"grad_norm": 19.51031494795868, |
|
"learning_rate": 1.2277028565807838e-07, |
|
"logits/chosen": -1.6188557147979736, |
|
"logits/rejected": -1.5686419010162354, |
|
"logps/chosen": -430.37677001953125, |
|
"logps/rejected": -559.7628173828125, |
|
"loss": 0.4852, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6697213649749756, |
|
"rewards/margins": 1.4008890390396118, |
|
"rewards/rejected": -3.070610523223877, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7053742802303263, |
|
"grad_norm": 9.121829865634552, |
|
"learning_rate": 1.209723640362815e-07, |
|
"logits/chosen": -1.6373344659805298, |
|
"logits/rejected": -1.5528385639190674, |
|
"logps/chosen": -470.95147705078125, |
|
"logps/rejected": -721.1168212890625, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9253082275390625, |
|
"rewards/margins": 2.59212327003479, |
|
"rewards/rejected": -4.517431259155273, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7077735124760077, |
|
"grad_norm": 10.746753751922695, |
|
"learning_rate": 1.191834931905277e-07, |
|
"logits/chosen": -1.6567723751068115, |
|
"logits/rejected": -1.6191829442977905, |
|
"logps/chosen": -511.308349609375, |
|
"logps/rejected": -676.2968139648438, |
|
"loss": 0.5049, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9652636051177979, |
|
"rewards/margins": 1.6685765981674194, |
|
"rewards/rejected": -3.6338400840759277, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.710172744721689, |
|
"grad_norm": 9.111246533138369, |
|
"learning_rate": 1.1740379860299988e-07, |
|
"logits/chosen": -1.605023741722107, |
|
"logits/rejected": -1.6224443912506104, |
|
"logps/chosen": -493.97021484375, |
|
"logps/rejected": -663.3172607421875, |
|
"loss": 0.517, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9065659046173096, |
|
"rewards/margins": 1.4981725215911865, |
|
"rewards/rejected": -3.404738664627075, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.7125719769673704, |
|
"grad_norm": 19.195509902532088, |
|
"learning_rate": 1.1563340511220254e-07, |
|
"logits/chosen": -1.5975620746612549, |
|
"logits/rejected": -1.5121543407440186, |
|
"logps/chosen": -490.51123046875, |
|
"logps/rejected": -638.4617309570312, |
|
"loss": 0.5129, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.8694756031036377, |
|
"rewards/margins": 1.672284722328186, |
|
"rewards/rejected": -3.541760206222534, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7149712092130518, |
|
"grad_norm": 7.617751594360228, |
|
"learning_rate": 1.1387243690420556e-07, |
|
"logits/chosen": -1.6257362365722656, |
|
"logits/rejected": -1.556630253791809, |
|
"logps/chosen": -514.2935180664062, |
|
"logps/rejected": -719.3594360351562, |
|
"loss": 0.5041, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7584432363510132, |
|
"rewards/margins": 2.219550371170044, |
|
"rewards/rejected": -3.9779934883117676, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7173704414587332, |
|
"grad_norm": 13.715441326355377, |
|
"learning_rate": 1.1212101750393235e-07, |
|
"logits/chosen": -1.5823287963867188, |
|
"logits/rejected": -1.4760550260543823, |
|
"logps/chosen": -454.1546936035156, |
|
"logps/rejected": -674.0308837890625, |
|
"loss": 0.4824, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.85116446018219, |
|
"rewards/margins": 2.426201581954956, |
|
"rewards/rejected": -4.277366638183594, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7197696737044146, |
|
"grad_norm": 11.870152015817894, |
|
"learning_rate": 1.1037926976649562e-07, |
|
"logits/chosen": -1.6472456455230713, |
|
"logits/rejected": -1.5713512897491455, |
|
"logps/chosen": -454.6564025878906, |
|
"logps/rejected": -658.142578125, |
|
"loss": 0.5097, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7928310632705688, |
|
"rewards/margins": 1.8045374155044556, |
|
"rewards/rejected": -3.5973682403564453, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.722168905950096, |
|
"grad_norm": 12.133762665442388, |
|
"learning_rate": 1.0864731586857936e-07, |
|
"logits/chosen": -1.5800144672393799, |
|
"logits/rejected": -1.4943416118621826, |
|
"logps/chosen": -462.8450622558594, |
|
"logps/rejected": -622.0472412109375, |
|
"loss": 0.4795, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.633138656616211, |
|
"rewards/margins": 1.8899364471435547, |
|
"rewards/rejected": -3.5230751037597656, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7245681381957774, |
|
"grad_norm": 15.184681556970155, |
|
"learning_rate": 1.0692527729986839e-07, |
|
"logits/chosen": -1.6487934589385986, |
|
"logits/rejected": -1.5611571073532104, |
|
"logps/chosen": -443.3125, |
|
"logps/rejected": -629.9989624023438, |
|
"loss": 0.4486, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7226934432983398, |
|
"rewards/margins": 2.09248948097229, |
|
"rewards/rejected": -3.81518292427063, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7269673704414588, |
|
"grad_norm": 8.881606654301349, |
|
"learning_rate": 1.0521327485452692e-07, |
|
"logits/chosen": -1.50355064868927, |
|
"logits/rejected": -1.4316844940185547, |
|
"logps/chosen": -454.41375732421875, |
|
"logps/rejected": -705.1302490234375, |
|
"loss": 0.4685, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0102181434631348, |
|
"rewards/margins": 2.672572612762451, |
|
"rewards/rejected": -4.682791233062744, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7293666026871402, |
|
"grad_norm": 12.06878430856091, |
|
"learning_rate": 1.0351142862272468e-07, |
|
"logits/chosen": -1.5602006912231445, |
|
"logits/rejected": -1.397483229637146, |
|
"logps/chosen": -426.56011962890625, |
|
"logps/rejected": -804.3447265625, |
|
"loss": 0.488, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9955031871795654, |
|
"rewards/margins": 3.9034297466278076, |
|
"rewards/rejected": -5.898933410644531, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7317658349328215, |
|
"grad_norm": 12.298414208256554, |
|
"learning_rate": 1.0181985798221343e-07, |
|
"logits/chosen": -1.5111182928085327, |
|
"logits/rejected": -1.4830925464630127, |
|
"logps/chosen": -453.55706787109375, |
|
"logps/rejected": -682.9276123046875, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8569743633270264, |
|
"rewards/margins": 2.276150703430176, |
|
"rewards/rejected": -4.133125305175781, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7341650671785028, |
|
"grad_norm": 11.302209741727303, |
|
"learning_rate": 1.0013868158995329e-07, |
|
"logits/chosen": -1.479041337966919, |
|
"logits/rejected": -1.4589459896087646, |
|
"logps/chosen": -488.8414001464844, |
|
"logps/rejected": -664.2630615234375, |
|
"loss": 0.4896, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.093553066253662, |
|
"rewards/margins": 1.9455658197402954, |
|
"rewards/rejected": -4.039118766784668, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.7365642994241842, |
|
"grad_norm": 9.425382101973028, |
|
"learning_rate": 9.84680173737887e-08, |
|
"logits/chosen": -1.6392253637313843, |
|
"logits/rejected": -1.6131207942962646, |
|
"logps/chosen": -453.7015686035156, |
|
"logps/rejected": -568.0528564453125, |
|
"loss": 0.505, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7246614694595337, |
|
"rewards/margins": 1.5568969249725342, |
|
"rewards/rejected": -3.2815582752227783, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.7389635316698656, |
|
"grad_norm": 10.214651228125971, |
|
"learning_rate": 9.680798252417713e-08, |
|
"logits/chosen": -1.5981972217559814, |
|
"logits/rejected": -1.5456653833389282, |
|
"logps/chosen": -404.54217529296875, |
|
"logps/rejected": -615.52490234375, |
|
"loss": 0.4716, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8212268352508545, |
|
"rewards/margins": 1.7736434936523438, |
|
"rewards/rejected": -3.5948708057403564, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.741362763915547, |
|
"grad_norm": 9.96347940855632, |
|
"learning_rate": 9.515869348596808e-08, |
|
"logits/chosen": -1.7033554315567017, |
|
"logits/rejected": -1.689077615737915, |
|
"logps/chosen": -479.61602783203125, |
|
"logps/rejected": -600.86083984375, |
|
"loss": 0.4722, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7726186513900757, |
|
"rewards/margins": 1.4355189800262451, |
|
"rewards/rejected": -3.2081375122070312, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.7437619961612284, |
|
"grad_norm": 9.182568453922713, |
|
"learning_rate": 9.352026595023493e-08, |
|
"logits/chosen": -1.7283750772476196, |
|
"logits/rejected": -1.651894211769104, |
|
"logps/chosen": -472.06890869140625, |
|
"logps/rejected": -567.2432250976562, |
|
"loss": 0.4948, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7672761678695679, |
|
"rewards/margins": 1.1923803091049194, |
|
"rewards/rejected": -2.9596564769744873, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7461612284069098, |
|
"grad_norm": 13.775785773649671, |
|
"learning_rate": 9.189281484616004e-08, |
|
"logits/chosen": -1.5656628608703613, |
|
"logits/rejected": -1.5381602048873901, |
|
"logps/chosen": -405.3626403808594, |
|
"logps/rejected": -634.8448486328125, |
|
"loss": 0.5248, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9110819101333618, |
|
"rewards/margins": 1.8859049081802368, |
|
"rewards/rejected": -3.7969868183135986, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.7485604606525912, |
|
"grad_norm": 18.73925450329653, |
|
"learning_rate": 9.027645433297249e-08, |
|
"logits/chosen": -1.5921382904052734, |
|
"logits/rejected": -1.3741363286972046, |
|
"logps/chosen": -594.7786865234375, |
|
"logps/rejected": -781.3391723632812, |
|
"loss": 0.5287, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.647385597229004, |
|
"rewards/margins": 2.289605140686035, |
|
"rewards/rejected": -4.936990737915039, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.7509596928982726, |
|
"grad_norm": 24.661577248495657, |
|
"learning_rate": 8.867129779194066e-08, |
|
"logits/chosen": -1.5586223602294922, |
|
"logits/rejected": -1.4272655248641968, |
|
"logps/chosen": -389.38580322265625, |
|
"logps/rejected": -706.0331420898438, |
|
"loss": 0.5109, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.627242088317871, |
|
"rewards/margins": 3.178426742553711, |
|
"rewards/rejected": -4.805668830871582, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.753358925143954, |
|
"grad_norm": 13.393978248198778, |
|
"learning_rate": 8.707745781841866e-08, |
|
"logits/chosen": -1.5590431690216064, |
|
"logits/rejected": -1.4452258348464966, |
|
"logps/chosen": -470.12890625, |
|
"logps/rejected": -711.6036376953125, |
|
"loss": 0.5168, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.2957184314727783, |
|
"rewards/margins": 2.5038769245147705, |
|
"rewards/rejected": -4.799595355987549, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.7557581573896354, |
|
"grad_norm": 15.326596868212043, |
|
"learning_rate": 8.549504621394831e-08, |
|
"logits/chosen": -1.6833692789077759, |
|
"logits/rejected": -1.5196136236190796, |
|
"logps/chosen": -434.9501953125, |
|
"logps/rejected": -797.6722412109375, |
|
"loss": 0.4405, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.8721044063568115, |
|
"rewards/margins": 3.692800998687744, |
|
"rewards/rejected": -5.564905166625977, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.7581573896353166, |
|
"grad_norm": 15.888863470601404, |
|
"learning_rate": 8.392417397841703e-08, |
|
"logits/chosen": -1.576700210571289, |
|
"logits/rejected": -1.5513032674789429, |
|
"logps/chosen": -457.85125732421875, |
|
"logps/rejected": -624.4636840820312, |
|
"loss": 0.4774, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7959423065185547, |
|
"rewards/margins": 1.5467565059661865, |
|
"rewards/rejected": -3.3426990509033203, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.760556621880998, |
|
"grad_norm": 80.57455805524923, |
|
"learning_rate": 8.236495130227083e-08, |
|
"logits/chosen": -1.5804119110107422, |
|
"logits/rejected": -1.4564083814620972, |
|
"logps/chosen": -529.6447143554688, |
|
"logps/rejected": -735.8713989257812, |
|
"loss": 0.5082, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.244884967803955, |
|
"rewards/margins": 2.5058138370513916, |
|
"rewards/rejected": -4.750698566436768, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.7629558541266794, |
|
"grad_norm": 11.057339252820107, |
|
"learning_rate": 8.081748755878612e-08, |
|
"logits/chosen": -1.6222680807113647, |
|
"logits/rejected": -1.5655839443206787, |
|
"logps/chosen": -490.6512756347656, |
|
"logps/rejected": -579.689208984375, |
|
"loss": 0.494, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9746372699737549, |
|
"rewards/margins": 1.4921201467514038, |
|
"rewards/rejected": -3.466757297515869, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.7653550863723608, |
|
"grad_norm": 10.618176488071374, |
|
"learning_rate": 7.928189129639632e-08, |
|
"logits/chosen": -1.5830059051513672, |
|
"logits/rejected": -1.5720051527023315, |
|
"logps/chosen": -406.87188720703125, |
|
"logps/rejected": -586.8067016601562, |
|
"loss": 0.4807, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.686078429222107, |
|
"rewards/margins": 1.659711241722107, |
|
"rewards/rejected": -3.345789670944214, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.7677543186180422, |
|
"grad_norm": 13.680452637120762, |
|
"learning_rate": 7.775827023107834e-08, |
|
"logits/chosen": -1.6163785457611084, |
|
"logits/rejected": -1.5762126445770264, |
|
"logps/chosen": -446.047607421875, |
|
"logps/rejected": -623.6538696289062, |
|
"loss": 0.5153, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9947631359100342, |
|
"rewards/margins": 1.5673038959503174, |
|
"rewards/rejected": -3.5620665550231934, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7701535508637236, |
|
"grad_norm": 14.433630119675083, |
|
"learning_rate": 7.624673123879682e-08, |
|
"logits/chosen": -1.7464625835418701, |
|
"logits/rejected": -1.6586641073226929, |
|
"logps/chosen": -430.14776611328125, |
|
"logps/rejected": -545.7342529296875, |
|
"loss": 0.5204, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7657415866851807, |
|
"rewards/margins": 1.3537575006484985, |
|
"rewards/rejected": -3.1194987297058105, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.772552783109405, |
|
"grad_norm": 12.66091896209343, |
|
"learning_rate": 7.474738034800663e-08, |
|
"logits/chosen": -1.5930930376052856, |
|
"logits/rejected": -1.4509809017181396, |
|
"logps/chosen": -376.0271301269531, |
|
"logps/rejected": -682.3421630859375, |
|
"loss": 0.5118, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4719572067260742, |
|
"rewards/margins": 3.3040995597839355, |
|
"rewards/rejected": -4.776057243347168, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.7749520153550864, |
|
"grad_norm": 10.980304828194955, |
|
"learning_rate": 7.326032273221606e-08, |
|
"logits/chosen": -1.6833438873291016, |
|
"logits/rejected": -1.622230887413025, |
|
"logps/chosen": -491.75860595703125, |
|
"logps/rejected": -625.6595458984375, |
|
"loss": 0.4872, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9277355670928955, |
|
"rewards/margins": 1.5761892795562744, |
|
"rewards/rejected": -3.503924608230591, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.7773512476007678, |
|
"grad_norm": 10.749977033774096, |
|
"learning_rate": 7.178566270260872e-08, |
|
"logits/chosen": -1.5899261236190796, |
|
"logits/rejected": -1.5829485654830933, |
|
"logps/chosen": -477.94805908203125, |
|
"logps/rejected": -673.7672729492188, |
|
"loss": 0.5126, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.0385262966156006, |
|
"rewards/margins": 1.790924072265625, |
|
"rewards/rejected": -3.8294501304626465, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.7797504798464492, |
|
"grad_norm": 8.484431263168627, |
|
"learning_rate": 7.032350370072709e-08, |
|
"logits/chosen": -1.6738132238388062, |
|
"logits/rejected": -1.6105749607086182, |
|
"logps/chosen": -458.83331298828125, |
|
"logps/rejected": -622.7803955078125, |
|
"loss": 0.4562, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.730473518371582, |
|
"rewards/margins": 1.7545102834701538, |
|
"rewards/rejected": -3.4849839210510254, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.7821497120921305, |
|
"grad_norm": 9.387892269267574, |
|
"learning_rate": 6.887394829121596e-08, |
|
"logits/chosen": -1.5848913192749023, |
|
"logits/rejected": -1.4476993083953857, |
|
"logps/chosen": -471.938720703125, |
|
"logps/rejected": -831.1727294921875, |
|
"loss": 0.4836, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8803211450576782, |
|
"rewards/margins": 3.8900272846221924, |
|
"rewards/rejected": -5.77034854888916, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.7845489443378119, |
|
"grad_norm": 18.641825226968827, |
|
"learning_rate": 6.743709815462833e-08, |
|
"logits/chosen": -1.6654167175292969, |
|
"logits/rejected": -1.5044206380844116, |
|
"logps/chosen": -483.08917236328125, |
|
"logps/rejected": -693.2271728515625, |
|
"loss": 0.4752, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.000235080718994, |
|
"rewards/margins": 2.46742582321167, |
|
"rewards/rejected": -4.467661380767822, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.7869481765834933, |
|
"grad_norm": 12.349301851226299, |
|
"learning_rate": 6.601305408029287e-08, |
|
"logits/chosen": -1.5291095972061157, |
|
"logits/rejected": -1.419311285018921, |
|
"logps/chosen": -436.09454345703125, |
|
"logps/rejected": -668.6248779296875, |
|
"loss": 0.4694, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8284947872161865, |
|
"rewards/margins": 2.354825496673584, |
|
"rewards/rejected": -4.18332052230835, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.7893474088291746, |
|
"grad_norm": 9.060603124592086, |
|
"learning_rate": 6.460191595924366e-08, |
|
"logits/chosen": -1.6744062900543213, |
|
"logits/rejected": -1.6133877038955688, |
|
"logps/chosen": -455.89495849609375, |
|
"logps/rejected": -635.6976318359375, |
|
"loss": 0.4573, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.951505422592163, |
|
"rewards/margins": 1.7754148244857788, |
|
"rewards/rejected": -3.7269206047058105, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.791746641074856, |
|
"grad_norm": 12.585511956386624, |
|
"learning_rate": 6.320378277721342e-08, |
|
"logits/chosen": -1.598135232925415, |
|
"logits/rejected": -1.6028814315795898, |
|
"logps/chosen": -483.92535400390625, |
|
"logps/rejected": -569.0828247070312, |
|
"loss": 0.4735, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.1339850425720215, |
|
"rewards/margins": 1.0029327869415283, |
|
"rewards/rejected": -3.136918067932129, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7941458733205374, |
|
"grad_norm": 13.389579263658327, |
|
"learning_rate": 6.181875260769032e-08, |
|
"logits/chosen": -1.640591025352478, |
|
"logits/rejected": -1.5519497394561768, |
|
"logps/chosen": -507.2039489746094, |
|
"logps/rejected": -642.580322265625, |
|
"loss": 0.4724, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0453391075134277, |
|
"rewards/margins": 2.058948040008545, |
|
"rewards/rejected": -4.104287147521973, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.7965451055662188, |
|
"grad_norm": 13.291226613775438, |
|
"learning_rate": 6.044692260503797e-08, |
|
"logits/chosen": -1.569854736328125, |
|
"logits/rejected": -1.5137242078781128, |
|
"logps/chosen": -525.5565185546875, |
|
"logps/rejected": -748.2379150390625, |
|
"loss": 0.443, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.161888360977173, |
|
"rewards/margins": 2.5372633934020996, |
|
"rewards/rejected": -4.699151515960693, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.7989443378119002, |
|
"grad_norm": 11.742394321174341, |
|
"learning_rate": 5.9088388997680984e-08, |
|
"logits/chosen": -1.680760383605957, |
|
"logits/rejected": -1.5625580549240112, |
|
"logps/chosen": -540.314453125, |
|
"logps/rejected": -675.5418090820312, |
|
"loss": 0.4639, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9422404766082764, |
|
"rewards/margins": 2.1821341514587402, |
|
"rewards/rejected": -4.1243743896484375, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8013435700575816, |
|
"grad_norm": 10.388328753668427, |
|
"learning_rate": 5.774324708135439e-08, |
|
"logits/chosen": -1.631049394607544, |
|
"logits/rejected": -1.5399057865142822, |
|
"logps/chosen": -395.74127197265625, |
|
"logps/rejected": -528.3821411132812, |
|
"loss": 0.4906, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6879892349243164, |
|
"rewards/margins": 1.5684130191802979, |
|
"rewards/rejected": -3.2564022541046143, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.803742802303263, |
|
"grad_norm": 8.566813415550278, |
|
"learning_rate": 5.641159121241953e-08, |
|
"logits/chosen": -1.5318087339401245, |
|
"logits/rejected": -1.3939533233642578, |
|
"logps/chosen": -455.18475341796875, |
|
"logps/rejected": -784.6776123046875, |
|
"loss": 0.4742, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1146061420440674, |
|
"rewards/margins": 3.002748966217041, |
|
"rewards/rejected": -5.1173553466796875, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8061420345489443, |
|
"grad_norm": 24.581271633624063, |
|
"learning_rate": 5.5093514801245106e-08, |
|
"logits/chosen": -1.572483777999878, |
|
"logits/rejected": -1.5058854818344116, |
|
"logps/chosen": -462.61798095703125, |
|
"logps/rejected": -692.5750732421875, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.017944812774658, |
|
"rewards/margins": 2.093128204345703, |
|
"rewards/rejected": -4.111073017120361, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8085412667946257, |
|
"grad_norm": 11.15477109986071, |
|
"learning_rate": 5.378911030565453e-08, |
|
"logits/chosen": -1.6517555713653564, |
|
"logits/rejected": -1.6141020059585571, |
|
"logps/chosen": -524.3145751953125, |
|
"logps/rejected": -706.8885498046875, |
|
"loss": 0.4856, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.254424571990967, |
|
"rewards/margins": 1.5656322240829468, |
|
"rewards/rejected": -3.820056438446045, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8109404990403071, |
|
"grad_norm": 14.053000697829326, |
|
"learning_rate": 5.249846922444101e-08, |
|
"logits/chosen": -1.565073013305664, |
|
"logits/rejected": -1.3497178554534912, |
|
"logps/chosen": -454.67254638671875, |
|
"logps/rejected": -954.7060546875, |
|
"loss": 0.4615, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2211482524871826, |
|
"rewards/margins": 5.220716953277588, |
|
"rewards/rejected": -7.441864967346191, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8133397312859885, |
|
"grad_norm": 14.873913908271929, |
|
"learning_rate": 5.122168209094865e-08, |
|
"logits/chosen": -1.5818357467651367, |
|
"logits/rejected": -1.6091349124908447, |
|
"logps/chosen": -396.847412109375, |
|
"logps/rejected": -471.558837890625, |
|
"loss": 0.4921, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7213027477264404, |
|
"rewards/margins": 0.7357383966445923, |
|
"rewards/rejected": -2.457041025161743, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8157389635316699, |
|
"grad_norm": 7.662127553049077, |
|
"learning_rate": 4.995883846672222e-08, |
|
"logits/chosen": -1.7527958154678345, |
|
"logits/rejected": -1.6081327199935913, |
|
"logps/chosen": -597.8883666992188, |
|
"logps/rejected": -634.3714599609375, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.129807472229004, |
|
"rewards/margins": 1.1348294019699097, |
|
"rewards/rejected": -3.2646374702453613, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8181381957773513, |
|
"grad_norm": 9.295843025722531, |
|
"learning_rate": 4.871002693522486e-08, |
|
"logits/chosen": -1.6534563302993774, |
|
"logits/rejected": -1.5655500888824463, |
|
"logps/chosen": -467.03948974609375, |
|
"logps/rejected": -591.6249389648438, |
|
"loss": 0.4943, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7798774242401123, |
|
"rewards/margins": 1.6510403156280518, |
|
"rewards/rejected": -3.4309182167053223, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8205374280230326, |
|
"grad_norm": 11.329859485907628, |
|
"learning_rate": 4.7475335095623956e-08, |
|
"logits/chosen": -1.6167590618133545, |
|
"logits/rejected": -1.5307334661483765, |
|
"logps/chosen": -470.58270263671875, |
|
"logps/rejected": -668.56640625, |
|
"loss": 0.4725, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9665504693984985, |
|
"rewards/margins": 2.131230592727661, |
|
"rewards/rejected": -4.097781181335449, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.822936660268714, |
|
"grad_norm": 20.11235991117846, |
|
"learning_rate": 4.6254849556646714e-08, |
|
"logits/chosen": -1.670371413230896, |
|
"logits/rejected": -1.4920861721038818, |
|
"logps/chosen": -548.06787109375, |
|
"logps/rejected": -802.3192138671875, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.2579779624938965, |
|
"rewards/margins": 3.0247139930725098, |
|
"rewards/rejected": -5.282691955566406, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.8253358925143954, |
|
"grad_norm": 11.445798775826706, |
|
"learning_rate": 4.504865593050483e-08, |
|
"logits/chosen": -1.6630195379257202, |
|
"logits/rejected": -1.5998250246047974, |
|
"logps/chosen": -498.11968994140625, |
|
"logps/rejected": -625.1238403320312, |
|
"loss": 0.5091, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1429293155670166, |
|
"rewards/margins": 1.2725276947021484, |
|
"rewards/rejected": -3.415456771850586, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.8277351247600768, |
|
"grad_norm": 11.346669302814137, |
|
"learning_rate": 4.385683882688895e-08, |
|
"logits/chosen": -1.723249077796936, |
|
"logits/rejected": -1.665723443031311, |
|
"logps/chosen": -476.0176696777344, |
|
"logps/rejected": -490.48779296875, |
|
"loss": 0.5342, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8500604629516602, |
|
"rewards/margins": 0.6172209978103638, |
|
"rewards/rejected": -2.4672813415527344, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.8301343570057581, |
|
"grad_norm": 15.969215547104008, |
|
"learning_rate": 4.2679481847033985e-08, |
|
"logits/chosen": -1.588076114654541, |
|
"logits/rejected": -1.5232843160629272, |
|
"logps/chosen": -461.88848876953125, |
|
"logps/rejected": -622.911865234375, |
|
"loss": 0.5174, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.8116493225097656, |
|
"rewards/margins": 1.6616909503936768, |
|
"rewards/rejected": -3.4733402729034424, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.8325335892514395, |
|
"grad_norm": 11.072153331210435, |
|
"learning_rate": 4.151666757785435e-08, |
|
"logits/chosen": -1.5928449630737305, |
|
"logits/rejected": -1.5169405937194824, |
|
"logps/chosen": -427.0491638183594, |
|
"logps/rejected": -672.35107421875, |
|
"loss": 0.4681, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.7070322036743164, |
|
"rewards/margins": 2.5327906608581543, |
|
"rewards/rejected": -4.239823341369629, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.8349328214971209, |
|
"grad_norm": 9.49570882884042, |
|
"learning_rate": 4.036847758615136e-08, |
|
"logits/chosen": -1.6075379848480225, |
|
"logits/rejected": -1.5888822078704834, |
|
"logps/chosen": -491.94390869140625, |
|
"logps/rejected": -652.8501586914062, |
|
"loss": 0.5274, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.3509058952331543, |
|
"rewards/margins": 1.588134765625, |
|
"rewards/rejected": -3.939040422439575, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.8373320537428023, |
|
"grad_norm": 11.923530456024624, |
|
"learning_rate": 3.923499241289113e-08, |
|
"logits/chosen": -1.6823714971542358, |
|
"logits/rejected": -1.593367338180542, |
|
"logps/chosen": -547.1072998046875, |
|
"logps/rejected": -672.8248901367188, |
|
"loss": 0.5441, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.304713249206543, |
|
"rewards/margins": 1.8532909154891968, |
|
"rewards/rejected": -4.158003807067871, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.8397312859884837, |
|
"grad_norm": 10.522005050954938, |
|
"learning_rate": 3.811629156755541e-08, |
|
"logits/chosen": -1.6868102550506592, |
|
"logits/rejected": -1.6846917867660522, |
|
"logps/chosen": -485.7550354003906, |
|
"logps/rejected": -589.5763549804688, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7846243381500244, |
|
"rewards/margins": 1.1891510486602783, |
|
"rewards/rejected": -2.9737753868103027, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8421305182341651, |
|
"grad_norm": 9.525223827769382, |
|
"learning_rate": 3.701245352256391e-08, |
|
"logits/chosen": -1.6697797775268555, |
|
"logits/rejected": -1.6646827459335327, |
|
"logps/chosen": -471.4710998535156, |
|
"logps/rejected": -512.9531860351562, |
|
"loss": 0.4984, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.646978735923767, |
|
"rewards/margins": 0.6669288873672485, |
|
"rewards/rejected": -2.3139073848724365, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.8445297504798465, |
|
"grad_norm": 12.160402315077537, |
|
"learning_rate": 3.592355570776984e-08, |
|
"logits/chosen": -1.6833436489105225, |
|
"logits/rejected": -1.6594688892364502, |
|
"logps/chosen": -362.41400146484375, |
|
"logps/rejected": -500.51666259765625, |
|
"loss": 0.472, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2686312198638916, |
|
"rewards/margins": 1.350454568862915, |
|
"rewards/rejected": -2.6190860271453857, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.8469289827255279, |
|
"grad_norm": 8.805306061647622, |
|
"learning_rate": 3.484967450502904e-08, |
|
"logits/chosen": -1.5704456567764282, |
|
"logits/rejected": -1.5578100681304932, |
|
"logps/chosen": -376.8490905761719, |
|
"logps/rejected": -579.5418701171875, |
|
"loss": 0.4634, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.635333776473999, |
|
"rewards/margins": 1.6225509643554688, |
|
"rewards/rejected": -3.257884979248047, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.8493282149712092, |
|
"grad_norm": 12.572003529621947, |
|
"learning_rate": 3.3790885242841296e-08, |
|
"logits/chosen": -1.6139347553253174, |
|
"logits/rejected": -1.5461299419403076, |
|
"logps/chosen": -473.8414611816406, |
|
"logps/rejected": -705.4661865234375, |
|
"loss": 0.4651, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.090013027191162, |
|
"rewards/margins": 2.4070258140563965, |
|
"rewards/rejected": -4.4970383644104, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.8517274472168906, |
|
"grad_norm": 14.913426265894294, |
|
"learning_rate": 3.274726219106677e-08, |
|
"logits/chosen": -1.657772421836853, |
|
"logits/rejected": -1.6069705486297607, |
|
"logps/chosen": -506.9501037597656, |
|
"logps/rejected": -699.5079345703125, |
|
"loss": 0.4897, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0737128257751465, |
|
"rewards/margins": 2.0126349925994873, |
|
"rewards/rejected": -4.086348056793213, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.8541266794625719, |
|
"grad_norm": 13.051269160073966, |
|
"learning_rate": 3.171887855571642e-08, |
|
"logits/chosen": -1.714739203453064, |
|
"logits/rejected": -1.664184808731079, |
|
"logps/chosen": -409.457763671875, |
|
"logps/rejected": -502.5277404785156, |
|
"loss": 0.484, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7403171062469482, |
|
"rewards/margins": 0.9889610409736633, |
|
"rewards/rejected": -2.729278087615967, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.8565259117082533, |
|
"grad_norm": 9.061162365072834, |
|
"learning_rate": 3.070580647381643e-08, |
|
"logits/chosen": -1.6095638275146484, |
|
"logits/rejected": -1.5605108737945557, |
|
"logps/chosen": -442.99029541015625, |
|
"logps/rejected": -652.7391357421875, |
|
"loss": 0.5072, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9627918004989624, |
|
"rewards/margins": 2.100388288497925, |
|
"rewards/rejected": -4.063180446624756, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.8589251439539347, |
|
"grad_norm": 13.034931697815875, |
|
"learning_rate": 2.9708117008348576e-08, |
|
"logits/chosen": -1.6348292827606201, |
|
"logits/rejected": -1.5857656002044678, |
|
"logps/chosen": -506.25128173828125, |
|
"logps/rejected": -560.7322387695312, |
|
"loss": 0.4512, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7951558828353882, |
|
"rewards/margins": 1.0923255681991577, |
|
"rewards/rejected": -2.887481212615967, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.8613243761996161, |
|
"grad_norm": 16.47510836794499, |
|
"learning_rate": 2.8725880143264992e-08, |
|
"logits/chosen": -1.652269959449768, |
|
"logits/rejected": -1.6364988088607788, |
|
"logps/chosen": -473.0938415527344, |
|
"logps/rejected": -605.9546508789062, |
|
"loss": 0.5383, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1653454303741455, |
|
"rewards/margins": 0.9054538011550903, |
|
"rewards/rejected": -3.070798873901367, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.8637236084452975, |
|
"grad_norm": 16.558002870622833, |
|
"learning_rate": 2.775916477857948e-08, |
|
"logits/chosen": -1.5733797550201416, |
|
"logits/rejected": -1.5531814098358154, |
|
"logps/chosen": -409.64007568359375, |
|
"logps/rejected": -545.078857421875, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9042761325836182, |
|
"rewards/margins": 1.3297529220581055, |
|
"rewards/rejected": -3.2340290546417236, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.8661228406909789, |
|
"grad_norm": 17.718409075113666, |
|
"learning_rate": 2.680803872553408e-08, |
|
"logits/chosen": -1.6162738800048828, |
|
"logits/rejected": -1.422507643699646, |
|
"logps/chosen": -433.74639892578125, |
|
"logps/rejected": -798.6219482421875, |
|
"loss": 0.4981, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6543105840682983, |
|
"rewards/margins": 3.9157538414001465, |
|
"rewards/rejected": -5.570064067840576, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.8685220729366603, |
|
"grad_norm": 19.479387945921662, |
|
"learning_rate": 2.5872568701842706e-08, |
|
"logits/chosen": -1.5644371509552002, |
|
"logits/rejected": -1.5578614473342896, |
|
"logps/chosen": -391.2760314941406, |
|
"logps/rejected": -588.4986572265625, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6809015274047852, |
|
"rewards/margins": 1.7512578964233398, |
|
"rewards/rejected": -3.432159423828125, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.8709213051823417, |
|
"grad_norm": 15.579421392343054, |
|
"learning_rate": 2.495282032701096e-08, |
|
"logits/chosen": -1.626412034034729, |
|
"logits/rejected": -1.508430004119873, |
|
"logps/chosen": -362.73089599609375, |
|
"logps/rejected": -497.5302734375, |
|
"loss": 0.4601, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5209667682647705, |
|
"rewards/margins": 1.7109506130218506, |
|
"rewards/rejected": -3.231917142868042, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.8733205374280231, |
|
"grad_norm": 12.975221010220178, |
|
"learning_rate": 2.4048858117733133e-08, |
|
"logits/chosen": -1.6329269409179688, |
|
"logits/rejected": -1.472723126411438, |
|
"logps/chosen": -473.1459045410156, |
|
"logps/rejected": -683.5482788085938, |
|
"loss": 0.4439, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.913275957107544, |
|
"rewards/margins": 2.6152966022491455, |
|
"rewards/rejected": -4.528572082519531, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.8757197696737045, |
|
"grad_norm": 14.423256562483388, |
|
"learning_rate": 2.3160745483366938e-08, |
|
"logits/chosen": -1.6391162872314453, |
|
"logits/rejected": -1.671979546546936, |
|
"logps/chosen": -443.46258544921875, |
|
"logps/rejected": -672.6021118164062, |
|
"loss": 0.476, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9398761987686157, |
|
"rewards/margins": 2.000032901763916, |
|
"rewards/rejected": -3.9399094581604004, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.8781190019193857, |
|
"grad_norm": 9.425763927010763, |
|
"learning_rate": 2.2288544721485197e-08, |
|
"logits/chosen": -1.6494081020355225, |
|
"logits/rejected": -1.5929086208343506, |
|
"logps/chosen": -396.45013427734375, |
|
"logps/rejected": -661.6722412109375, |
|
"loss": 0.4608, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.6658204793930054, |
|
"rewards/margins": 2.458636999130249, |
|
"rewards/rejected": -4.124457359313965, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.8805182341650671, |
|
"grad_norm": 21.417485675065244, |
|
"learning_rate": 2.1432317013506117e-08, |
|
"logits/chosen": -1.740401268005371, |
|
"logits/rejected": -1.6570911407470703, |
|
"logps/chosen": -494.31964111328125, |
|
"logps/rejected": -612.0262451171875, |
|
"loss": 0.5057, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.1772966384887695, |
|
"rewards/margins": 1.6181838512420654, |
|
"rewards/rejected": -3.795480728149414, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.8829174664107485, |
|
"grad_norm": 16.00309629191891, |
|
"learning_rate": 2.0592122420401704e-08, |
|
"logits/chosen": -1.5891798734664917, |
|
"logits/rejected": -1.5248258113861084, |
|
"logps/chosen": -467.4602966308594, |
|
"logps/rejected": -596.62060546875, |
|
"loss": 0.526, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2219550609588623, |
|
"rewards/margins": 1.2901359796524048, |
|
"rewards/rejected": -3.5120906829833984, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.8853166986564299, |
|
"grad_norm": 13.429038672487438, |
|
"learning_rate": 1.976801987848459e-08, |
|
"logits/chosen": -1.6623175144195557, |
|
"logits/rejected": -1.6009843349456787, |
|
"logps/chosen": -486.54345703125, |
|
"logps/rejected": -719.8893432617188, |
|
"loss": 0.5037, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.023263931274414, |
|
"rewards/margins": 2.204184055328369, |
|
"rewards/rejected": -4.227447986602783, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.8877159309021113, |
|
"grad_norm": 26.48164665804949, |
|
"learning_rate": 1.8960067195273987e-08, |
|
"logits/chosen": -1.6708219051361084, |
|
"logits/rejected": -1.5749927759170532, |
|
"logps/chosen": -400.6322937011719, |
|
"logps/rejected": -630.9937133789062, |
|
"loss": 0.5018, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7153675556182861, |
|
"rewards/margins": 2.3315227031707764, |
|
"rewards/rejected": -4.0468902587890625, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8901151631477927, |
|
"grad_norm": 11.411704364971511, |
|
"learning_rate": 1.816832104544072e-08, |
|
"logits/chosen": -1.564396619796753, |
|
"logits/rejected": -1.4874539375305176, |
|
"logps/chosen": -481.55242919921875, |
|
"logps/rejected": -635.627685546875, |
|
"loss": 0.4544, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.952723503112793, |
|
"rewards/margins": 1.7845569849014282, |
|
"rewards/rejected": -3.7372806072235107, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.8925143953934741, |
|
"grad_norm": 15.030315396145902, |
|
"learning_rate": 1.7392836966831553e-08, |
|
"logits/chosen": -1.5914599895477295, |
|
"logits/rejected": -1.4839370250701904, |
|
"logps/chosen": -488.05621337890625, |
|
"logps/rejected": -683.0465698242188, |
|
"loss": 0.465, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.0653412342071533, |
|
"rewards/margins": 2.3046443462371826, |
|
"rewards/rejected": -4.369986057281494, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.8949136276391555, |
|
"grad_norm": 11.782758949527297, |
|
"learning_rate": 1.663366935657373e-08, |
|
"logits/chosen": -1.6213829517364502, |
|
"logits/rejected": -1.4937806129455566, |
|
"logps/chosen": -420.52911376953125, |
|
"logps/rejected": -647.6318969726562, |
|
"loss": 0.514, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7656971216201782, |
|
"rewards/margins": 2.258690357208252, |
|
"rewards/rejected": -4.024387836456299, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.8973128598848369, |
|
"grad_norm": 19.85256790289952, |
|
"learning_rate": 1.5890871467258898e-08, |
|
"logits/chosen": -1.6818307638168335, |
|
"logits/rejected": -1.6265443563461304, |
|
"logps/chosen": -564.2450561523438, |
|
"logps/rejected": -681.9746704101562, |
|
"loss": 0.5019, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.256525754928589, |
|
"rewards/margins": 1.55268394947052, |
|
"rewards/rejected": -3.8092098236083984, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.8997120921305183, |
|
"grad_norm": 12.586313589978424, |
|
"learning_rate": 1.5164495403207967e-08, |
|
"logits/chosen": -1.700171709060669, |
|
"logits/rejected": -1.6361474990844727, |
|
"logps/chosen": -475.1417541503906, |
|
"logps/rejected": -746.2467041015625, |
|
"loss": 0.4623, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0661838054656982, |
|
"rewards/margins": 2.443232297897339, |
|
"rewards/rejected": -4.509416103363037, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.9021113243761996, |
|
"grad_norm": 12.787222537511584, |
|
"learning_rate": 1.4454592116815962e-08, |
|
"logits/chosen": -1.593766450881958, |
|
"logits/rejected": -1.5601489543914795, |
|
"logps/chosen": -449.85858154296875, |
|
"logps/rejected": -639.9388427734375, |
|
"loss": 0.445, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.833129644393921, |
|
"rewards/margins": 1.7945473194122314, |
|
"rewards/rejected": -3.6276767253875732, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.904510556621881, |
|
"grad_norm": 10.429284153734484, |
|
"learning_rate": 1.3761211404977934e-08, |
|
"logits/chosen": -1.5752068758010864, |
|
"logits/rejected": -1.422378659248352, |
|
"logps/chosen": -471.98876953125, |
|
"logps/rejected": -817.8197021484375, |
|
"loss": 0.442, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2775814533233643, |
|
"rewards/margins": 3.5992565155029297, |
|
"rewards/rejected": -5.876837730407715, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9069097888675623, |
|
"grad_norm": 12.741709586667612, |
|
"learning_rate": 1.3084401905596177e-08, |
|
"logits/chosen": -1.7176425457000732, |
|
"logits/rejected": -1.6414988040924072, |
|
"logps/chosen": -503.9371643066406, |
|
"logps/rejected": -640.4576416015625, |
|
"loss": 0.4707, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8939365148544312, |
|
"rewards/margins": 1.9764692783355713, |
|
"rewards/rejected": -3.870405912399292, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9093090211132437, |
|
"grad_norm": 11.675903630631732, |
|
"learning_rate": 1.2424211094168053e-08, |
|
"logits/chosen": -1.6118358373641968, |
|
"logits/rejected": -1.5552772283554077, |
|
"logps/chosen": -522.3504638671875, |
|
"logps/rejected": -701.8868408203125, |
|
"loss": 0.5099, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.871604323387146, |
|
"rewards/margins": 1.9561458826065063, |
|
"rewards/rejected": -3.8277504444122314, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9117082533589251, |
|
"grad_norm": 11.816100782837214, |
|
"learning_rate": 1.1780685280456143e-08, |
|
"logits/chosen": -1.6721569299697876, |
|
"logits/rejected": -1.6179077625274658, |
|
"logps/chosen": -539.4227905273438, |
|
"logps/rejected": -888.7571411132812, |
|
"loss": 0.4959, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.298069477081299, |
|
"rewards/margins": 3.5056235790252686, |
|
"rewards/rejected": -5.803693771362305, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9141074856046065, |
|
"grad_norm": 11.727624985469124, |
|
"learning_rate": 1.1153869605239564e-08, |
|
"logits/chosen": -1.6193329095840454, |
|
"logits/rejected": -1.5656651258468628, |
|
"logps/chosen": -435.56158447265625, |
|
"logps/rejected": -478.83770751953125, |
|
"loss": 0.4958, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5856086015701294, |
|
"rewards/margins": 0.8730261921882629, |
|
"rewards/rejected": -2.458634614944458, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9165067178502879, |
|
"grad_norm": 16.85961078114398, |
|
"learning_rate": 1.0543808037147606e-08, |
|
"logits/chosen": -1.6750271320343018, |
|
"logits/rejected": -1.6107738018035889, |
|
"logps/chosen": -503.64471435546875, |
|
"logps/rejected": -785.1126708984375, |
|
"loss": 0.4807, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.2831358909606934, |
|
"rewards/margins": 2.812958240509033, |
|
"rewards/rejected": -5.096093654632568, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.9189059500959693, |
|
"grad_norm": 12.461972445162298, |
|
"learning_rate": 9.95054336957557e-09, |
|
"logits/chosen": -1.734662652015686, |
|
"logits/rejected": -1.739912748336792, |
|
"logps/chosen": -434.0250549316406, |
|
"logps/rejected": -562.7544555664062, |
|
"loss": 0.4761, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7552568912506104, |
|
"rewards/margins": 1.2200746536254883, |
|
"rewards/rejected": -2.9753317832946777, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.9213051823416507, |
|
"grad_norm": 12.522523399418205, |
|
"learning_rate": 9.37411721768286e-09, |
|
"logits/chosen": -1.5948150157928467, |
|
"logits/rejected": -1.5764580965042114, |
|
"logps/chosen": -513.3563232421875, |
|
"logps/rejected": -767.2200927734375, |
|
"loss": 0.474, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.226046085357666, |
|
"rewards/margins": 2.2817482948303223, |
|
"rewards/rejected": -4.507794380187988, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.9237044145873321, |
|
"grad_norm": 10.9488041002692, |
|
"learning_rate": 8.81457001547392e-09, |
|
"logits/chosen": -1.6462711095809937, |
|
"logits/rejected": -1.6491082906723022, |
|
"logps/chosen": -459.8772888183594, |
|
"logps/rejected": -560.5117797851562, |
|
"loss": 0.4729, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.011331558227539, |
|
"rewards/margins": 0.9149211645126343, |
|
"rewards/rejected": -2.9262523651123047, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.9261036468330134, |
|
"grad_norm": 9.649161518168588, |
|
"learning_rate": 8.271941012961942e-09, |
|
"logits/chosen": -1.5380823612213135, |
|
"logits/rejected": -1.412889838218689, |
|
"logps/chosen": -441.87335205078125, |
|
"logps/rejected": -816.4763793945312, |
|
"loss": 0.4613, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.1320416927337646, |
|
"rewards/margins": 3.2392337322235107, |
|
"rewards/rejected": -5.371275424957275, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.9285028790786948, |
|
"grad_norm": 11.267633686817604, |
|
"learning_rate": 7.746268273415568e-09, |
|
"logits/chosen": -1.648723840713501, |
|
"logits/rejected": -1.6860382556915283, |
|
"logps/chosen": -460.56036376953125, |
|
"logps/rejected": -567.8131103515625, |
|
"loss": 0.4806, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.851995825767517, |
|
"rewards/margins": 0.5843394994735718, |
|
"rewards/rejected": -2.436335325241089, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.9309021113243762, |
|
"grad_norm": 11.04068659513372, |
|
"learning_rate": 7.237588670689076e-09, |
|
"logits/chosen": -1.7109369039535522, |
|
"logits/rejected": -1.5930227041244507, |
|
"logps/chosen": -468.35186767578125, |
|
"logps/rejected": -685.10986328125, |
|
"loss": 0.4614, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9170551300048828, |
|
"rewards/margins": 2.5844249725341797, |
|
"rewards/rejected": -4.5014801025390625, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.9333013435700576, |
|
"grad_norm": 14.964168805394552, |
|
"learning_rate": 6.745937886635606e-09, |
|
"logits/chosen": -1.6948446035385132, |
|
"logits/rejected": -1.5991394519805908, |
|
"logps/chosen": -517.13623046875, |
|
"logps/rejected": -857.3069458007812, |
|
"loss": 0.4527, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.1508491039276123, |
|
"rewards/margins": 3.447594404220581, |
|
"rewards/rejected": -5.598443508148193, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.935700575815739, |
|
"grad_norm": 14.883238902792032, |
|
"learning_rate": 6.271350408604409e-09, |
|
"logits/chosen": -1.6298729181289673, |
|
"logits/rejected": -1.5697710514068604, |
|
"logps/chosen": -407.47216796875, |
|
"logps/rejected": -590.4325561523438, |
|
"loss": 0.4973, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.779510259628296, |
|
"rewards/margins": 1.6561663150787354, |
|
"rewards/rejected": -3.4356765747070312, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9380998080614203, |
|
"grad_norm": 11.184941495697121, |
|
"learning_rate": 5.813859527021487e-09, |
|
"logits/chosen": -1.5353864431381226, |
|
"logits/rejected": -1.5042493343353271, |
|
"logps/chosen": -483.96435546875, |
|
"logps/rejected": -689.84130859375, |
|
"loss": 0.4324, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.045846462249756, |
|
"rewards/margins": 2.3413102626800537, |
|
"rewards/rejected": -4.387156963348389, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.9404990403071017, |
|
"grad_norm": 14.299734951667778, |
|
"learning_rate": 5.373497333054616e-09, |
|
"logits/chosen": -1.6646445989608765, |
|
"logits/rejected": -1.649578332901001, |
|
"logps/chosen": -482.82843017578125, |
|
"logps/rejected": -551.9417724609375, |
|
"loss": 0.489, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.0101306438446045, |
|
"rewards/margins": 0.7671645879745483, |
|
"rewards/rejected": -2.777294874191284, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.9428982725527831, |
|
"grad_norm": 10.602051511979361, |
|
"learning_rate": 4.950294716362213e-09, |
|
"logits/chosen": -1.6949056386947632, |
|
"logits/rejected": -1.670143723487854, |
|
"logps/chosen": -504.10821533203125, |
|
"logps/rejected": -612.98876953125, |
|
"loss": 0.4873, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.004544734954834, |
|
"rewards/margins": 1.1068131923675537, |
|
"rewards/rejected": -3.111358165740967, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.9452975047984645, |
|
"grad_norm": 10.528232336553208, |
|
"learning_rate": 4.544281362926422e-09, |
|
"logits/chosen": -1.7100117206573486, |
|
"logits/rejected": -1.6325490474700928, |
|
"logps/chosen": -508.181640625, |
|
"logps/rejected": -681.7391967773438, |
|
"loss": 0.5311, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9281724691390991, |
|
"rewards/margins": 1.9070625305175781, |
|
"rewards/rejected": -3.835235595703125, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.9476967370441459, |
|
"grad_norm": 11.26583083013792, |
|
"learning_rate": 4.15548575297095e-09, |
|
"logits/chosen": -1.668421745300293, |
|
"logits/rejected": -1.5268778800964355, |
|
"logps/chosen": -452.11077880859375, |
|
"logps/rejected": -725.6536254882812, |
|
"loss": 0.4537, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9140384197235107, |
|
"rewards/margins": 2.8626558780670166, |
|
"rewards/rejected": -4.776694297790527, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.9500959692898272, |
|
"grad_norm": 9.866263268327208, |
|
"learning_rate": 3.7839351589631366e-09, |
|
"logits/chosen": -1.6612355709075928, |
|
"logits/rejected": -1.623468041419983, |
|
"logps/chosen": -477.5087890625, |
|
"logps/rejected": -704.5999755859375, |
|
"loss": 0.5035, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.534862518310547, |
|
"rewards/margins": 1.8013521432876587, |
|
"rewards/rejected": -4.336214065551758, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.9524952015355086, |
|
"grad_norm": 11.105179609469939, |
|
"learning_rate": 3.4296556437010405e-09, |
|
"logits/chosen": -1.6372588872909546, |
|
"logits/rejected": -1.5610383749008179, |
|
"logps/chosen": -448.084228515625, |
|
"logps/rejected": -620.5638427734375, |
|
"loss": 0.4961, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.273430585861206, |
|
"rewards/margins": 1.8218492269515991, |
|
"rewards/rejected": -4.095280170440674, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.95489443378119, |
|
"grad_norm": 15.432439336051313, |
|
"learning_rate": 3.092672058485124e-09, |
|
"logits/chosen": -1.6299177408218384, |
|
"logits/rejected": -1.5185959339141846, |
|
"logps/chosen": -480.63623046875, |
|
"logps/rejected": -749.26171875, |
|
"loss": 0.5051, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.3645920753479004, |
|
"rewards/margins": 2.5585074424743652, |
|
"rewards/rejected": -4.923099517822266, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.9572936660268714, |
|
"grad_norm": 13.295737344758477, |
|
"learning_rate": 2.7730080413750356e-09, |
|
"logits/chosen": -1.6487003564834595, |
|
"logits/rejected": -1.616842269897461, |
|
"logps/chosen": -471.357421875, |
|
"logps/rejected": -603.6805419921875, |
|
"loss": 0.4891, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8420718908309937, |
|
"rewards/margins": 1.3825440406799316, |
|
"rewards/rejected": -3.2246158123016357, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"grad_norm": 11.019669520177573, |
|
"learning_rate": 2.4706860155316033e-09, |
|
"logits/chosen": -1.641543984413147, |
|
"logits/rejected": -1.5944894552230835, |
|
"logps/chosen": -602.7939453125, |
|
"logps/rejected": -756.920166015625, |
|
"loss": 0.4894, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.417752504348755, |
|
"rewards/margins": 1.6268641948699951, |
|
"rewards/rejected": -4.04461669921875, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"eval_logits/chosen": -1.5951924324035645, |
|
"eval_logits/rejected": -1.5356674194335938, |
|
"eval_logps/chosen": -472.6584167480469, |
|
"eval_logps/rejected": -691.1746215820312, |
|
"eval_loss": 0.4801708161830902, |
|
"eval_rewards/accuracies": 0.7928571701049805, |
|
"eval_rewards/chosen": -2.003549337387085, |
|
"eval_rewards/margins": 2.17158842086792, |
|
"eval_rewards/rejected": -4.175137996673584, |
|
"eval_runtime": 45.9357, |
|
"eval_samples_per_second": 97.114, |
|
"eval_steps_per_second": 1.524, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9620921305182342, |
|
"grad_norm": 11.233710733457718, |
|
"learning_rate": 2.185727187643843e-09, |
|
"logits/chosen": -1.6332000494003296, |
|
"logits/rejected": -1.5429205894470215, |
|
"logps/chosen": -419.66815185546875, |
|
"logps/rejected": -734.4024658203125, |
|
"loss": 0.4842, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.8838298320770264, |
|
"rewards/margins": 3.1948580741882324, |
|
"rewards/rejected": -5.078688144683838, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.9644913627639156, |
|
"grad_norm": 15.328579684902264, |
|
"learning_rate": 1.9181515464413434e-09, |
|
"logits/chosen": -1.670598030090332, |
|
"logits/rejected": -1.6298954486846924, |
|
"logps/chosen": -588.2853393554688, |
|
"logps/rejected": -813.2299194335938, |
|
"loss": 0.4958, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.263362407684326, |
|
"rewards/margins": 2.3236560821533203, |
|
"rewards/rejected": -4.5870184898376465, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.966890595009597, |
|
"grad_norm": 13.812423991968044, |
|
"learning_rate": 1.6679778612923302e-09, |
|
"logits/chosen": -1.752912163734436, |
|
"logits/rejected": -1.669989824295044, |
|
"logps/chosen": -523.4584350585938, |
|
"logps/rejected": -659.4483642578125, |
|
"loss": 0.4615, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.229353427886963, |
|
"rewards/margins": 1.3204247951507568, |
|
"rewards/rejected": -3.549778699874878, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.9692898272552783, |
|
"grad_norm": 19.733776466762233, |
|
"learning_rate": 1.43522368088686e-09, |
|
"logits/chosen": -1.619080901145935, |
|
"logits/rejected": -1.4254459142684937, |
|
"logps/chosen": -500.8919982910156, |
|
"logps/rejected": -871.8958740234375, |
|
"loss": 0.5482, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.26896595954895, |
|
"rewards/margins": 3.7620201110839844, |
|
"rewards/rejected": -6.030986309051514, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.9716890595009597, |
|
"grad_norm": 13.902504420645041, |
|
"learning_rate": 1.2199053320059993e-09, |
|
"logits/chosen": -1.6571025848388672, |
|
"logits/rejected": -1.6325149536132812, |
|
"logps/chosen": -482.8365783691406, |
|
"logps/rejected": -686.3687744140625, |
|
"loss": 0.4742, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.980038046836853, |
|
"rewards/margins": 1.896106481552124, |
|
"rewards/rejected": -3.8761448860168457, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.974088291746641, |
|
"grad_norm": 10.386888230304015, |
|
"learning_rate": 1.0220379183764338e-09, |
|
"logits/chosen": -1.6559547185897827, |
|
"logits/rejected": -1.545379638671875, |
|
"logps/chosen": -393.8249206542969, |
|
"logps/rejected": -653.940185546875, |
|
"loss": 0.4873, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7048404216766357, |
|
"rewards/margins": 2.559105396270752, |
|
"rewards/rejected": -4.263945579528809, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.9764875239923224, |
|
"grad_norm": 13.972760200119874, |
|
"learning_rate": 8.416353196111503e-10, |
|
"logits/chosen": -1.4547879695892334, |
|
"logits/rejected": -1.3284496068954468, |
|
"logps/chosen": -491.6461486816406, |
|
"logps/rejected": -743.1737670898438, |
|
"loss": 0.5082, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.368063449859619, |
|
"rewards/margins": 2.6352427005767822, |
|
"rewards/rejected": -5.0033063888549805, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.9788867562380038, |
|
"grad_norm": 11.880689150494451, |
|
"learning_rate": 6.787101902356873e-10, |
|
"logits/chosen": -1.5535688400268555, |
|
"logits/rejected": -1.5120995044708252, |
|
"logps/chosen": -489.68719482421875, |
|
"logps/rejected": -716.9655151367188, |
|
"loss": 0.4804, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.1152749061584473, |
|
"rewards/margins": 2.0652334690093994, |
|
"rewards/rejected": -4.180508613586426, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.9812859884836852, |
|
"grad_norm": 18.79541252367771, |
|
"learning_rate": 5.332739588005953e-10, |
|
"logits/chosen": -1.630657434463501, |
|
"logits/rejected": -1.5946576595306396, |
|
"logps/chosen": -391.0518493652344, |
|
"logps/rejected": -655.5036010742188, |
|
"loss": 0.4848, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7571265697479248, |
|
"rewards/margins": 2.442540407180786, |
|
"rewards/rejected": -4.199666976928711, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.9836852207293666, |
|
"grad_norm": 18.62380663413524, |
|
"learning_rate": 4.053368270797164e-10, |
|
"logits/chosen": -1.5710262060165405, |
|
"logits/rejected": -1.495965838432312, |
|
"logps/chosen": -468.63946533203125, |
|
"logps/rejected": -698.932373046875, |
|
"loss": 0.4696, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.210425853729248, |
|
"rewards/margins": 2.375892162322998, |
|
"rewards/rejected": -4.586318016052246, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.986084452975048, |
|
"grad_norm": 10.138418324575126, |
|
"learning_rate": 2.949077693545354e-10, |
|
"logits/chosen": -1.5962473154067993, |
|
"logits/rejected": -1.61128830909729, |
|
"logps/chosen": -506.21630859375, |
|
"logps/rejected": -674.5911865234375, |
|
"loss": 0.5628, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.2012012004852295, |
|
"rewards/margins": 1.3675811290740967, |
|
"rewards/rejected": -3.568782091140747, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.9884836852207294, |
|
"grad_norm": 11.981782992200987, |
|
"learning_rate": 2.0199453178471047e-10, |
|
"logits/chosen": -1.6453485488891602, |
|
"logits/rejected": -1.6295568943023682, |
|
"logps/chosen": -540.22607421875, |
|
"logps/rejected": -574.7904663085938, |
|
"loss": 0.4926, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.1138644218444824, |
|
"rewards/margins": 0.7879716157913208, |
|
"rewards/rejected": -2.9018359184265137, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.9908829174664108, |
|
"grad_norm": 15.974370111697047, |
|
"learning_rate": 1.266036318647301e-10, |
|
"logits/chosen": -1.632550835609436, |
|
"logits/rejected": -1.550283670425415, |
|
"logps/chosen": -528.643310546875, |
|
"logps/rejected": -716.7777709960938, |
|
"loss": 0.49, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1183180809020996, |
|
"rewards/margins": 2.1329312324523926, |
|
"rewards/rejected": -4.25124979019165, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.9932821497120922, |
|
"grad_norm": 15.469000968932196, |
|
"learning_rate": 6.874035796672339e-11, |
|
"logits/chosen": -1.6695518493652344, |
|
"logits/rejected": -1.5455235242843628, |
|
"logps/chosen": -510.795654296875, |
|
"logps/rejected": -794.1751708984375, |
|
"loss": 0.4936, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.969916582107544, |
|
"rewards/margins": 3.434035539627075, |
|
"rewards/rejected": -5.403952121734619, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.9956813819577736, |
|
"grad_norm": 13.801035998962295, |
|
"learning_rate": 2.8408768969423458e-11, |
|
"logits/chosen": -1.7181600332260132, |
|
"logits/rejected": -1.696080207824707, |
|
"logps/chosen": -485.26495361328125, |
|
"logps/rejected": -656.0448608398438, |
|
"loss": 0.4756, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.976780652999878, |
|
"rewards/margins": 1.6538575887680054, |
|
"rewards/rejected": -3.6306381225585938, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.9980806142034548, |
|
"grad_norm": 11.770076455666835, |
|
"learning_rate": 5.611693973617271e-12, |
|
"logits/chosen": -1.54105544090271, |
|
"logits/rejected": -1.4928066730499268, |
|
"logps/chosen": -425.23748779296875, |
|
"logps/rejected": -606.849853515625, |
|
"loss": 0.5225, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8429193496704102, |
|
"rewards/margins": 1.6649690866470337, |
|
"rewards/rejected": -3.5078887939453125, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 4168, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5327433187535995, |
|
"train_runtime": 4228.4167, |
|
"train_samples_per_second": 31.541, |
|
"train_steps_per_second": 0.986 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4168, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|