|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984301412872841, |
|
"eval_steps": 100, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.2547454833984375, |
|
"logits/rejected": -2.401865005493164, |
|
"logps/chosen": -53.759212493896484, |
|
"logps/rejected": -48.83185958862305, |
|
"loss": 0.6931, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1, |
|
"use_label": 10.0 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"logits/chosen": -2.2421748638153076, |
|
"logits/rejected": -2.2769579887390137, |
|
"logps/chosen": -51.987098693847656, |
|
"logps/rejected": -64.96717071533203, |
|
"loss": 0.6929, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 0.0019227324519306421, |
|
"rewards/margins": 0.0004911368596367538, |
|
"rewards/rejected": 0.0014315954176709056, |
|
"step": 10, |
|
"use_label": 90.0 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"logits/chosen": -2.2521612644195557, |
|
"logits/rejected": -2.255767822265625, |
|
"logps/chosen": -62.4937629699707, |
|
"logps/rejected": -72.63874816894531, |
|
"loss": 0.6919, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": 0.01600126549601555, |
|
"rewards/margins": 0.0011427802965044975, |
|
"rewards/rejected": 0.0148584870621562, |
|
"step": 20, |
|
"use_label": 242.0 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -2.3423426151275635, |
|
"logits/rejected": -2.3549609184265137, |
|
"logps/chosen": -79.10475158691406, |
|
"logps/rejected": -98.8157958984375, |
|
"loss": 0.6897, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": 0.03137165680527687, |
|
"rewards/margins": 0.0032712810207158327, |
|
"rewards/rejected": 0.028100375086069107, |
|
"step": 30, |
|
"use_label": 402.0 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": -2.323338270187378, |
|
"logits/rejected": -2.3015079498291016, |
|
"logps/chosen": -82.85453796386719, |
|
"logps/rejected": -82.39984893798828, |
|
"loss": 0.6866, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": 0.03337595611810684, |
|
"rewards/margins": 0.011919925920665264, |
|
"rewards/rejected": 0.021456023678183556, |
|
"step": 40, |
|
"use_label": 562.0 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 4.999731868769027e-06, |
|
"logits/chosen": -2.2404515743255615, |
|
"logits/rejected": -2.262972354888916, |
|
"logps/chosen": -67.89888000488281, |
|
"logps/rejected": -81.8695068359375, |
|
"loss": 0.6805, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": 0.009319942444562912, |
|
"rewards/margins": 0.030618786811828613, |
|
"rewards/rejected": -0.0212988443672657, |
|
"step": 50, |
|
"use_label": 722.0 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.0234375, |
|
"learning_rate": 4.9903533134293035e-06, |
|
"logits/chosen": -2.2157275676727295, |
|
"logits/rejected": -2.155928134918213, |
|
"logps/chosen": -63.64031982421875, |
|
"logps/rejected": -73.28236389160156, |
|
"loss": 0.6752, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.03914070501923561, |
|
"rewards/margins": 0.04399287328124046, |
|
"rewards/rejected": -0.08313358575105667, |
|
"step": 60, |
|
"use_label": 882.0 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.859375, |
|
"learning_rate": 4.967625656594782e-06, |
|
"logits/chosen": -2.114478588104248, |
|
"logits/rejected": -2.1126065254211426, |
|
"logps/chosen": -70.76527404785156, |
|
"logps/rejected": -83.94652557373047, |
|
"loss": 0.6712, |
|
"pred_label": 0.0, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.15054164826869965, |
|
"rewards/margins": 0.030909737572073936, |
|
"rewards/rejected": -0.18145139515399933, |
|
"step": 70, |
|
"use_label": 1042.0 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.1640625, |
|
"learning_rate": 4.93167072587771e-06, |
|
"logits/chosen": -2.2166943550109863, |
|
"logits/rejected": -2.1609182357788086, |
|
"logps/chosen": -54.8065185546875, |
|
"logps/rejected": -69.45613861083984, |
|
"loss": 0.6589, |
|
"pred_label": 0.4749999940395355, |
|
"rewards/accuracies": 0.26249998807907104, |
|
"rewards/chosen": -0.06275613605976105, |
|
"rewards/margins": 0.10003063827753067, |
|
"rewards/rejected": -0.16278676688671112, |
|
"step": 80, |
|
"use_label": 1201.5250244140625 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.8125, |
|
"learning_rate": 4.882681251368549e-06, |
|
"logits/chosen": -1.9692049026489258, |
|
"logits/rejected": -1.9792039394378662, |
|
"logps/chosen": -76.60871887207031, |
|
"logps/rejected": -96.53330993652344, |
|
"loss": 0.6564, |
|
"pred_label": 2.0999999046325684, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.18226662278175354, |
|
"rewards/margins": 0.09542477130889893, |
|
"rewards/rejected": -0.27769142389297485, |
|
"step": 90, |
|
"use_label": 1359.9000244140625 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.171875, |
|
"learning_rate": 4.8209198325401815e-06, |
|
"logits/chosen": -1.9027693271636963, |
|
"logits/rejected": -1.8775581121444702, |
|
"logps/chosen": -92.94733428955078, |
|
"logps/rejected": -84.73824310302734, |
|
"loss": 0.6531, |
|
"pred_label": 4.0, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.12917451560497284, |
|
"rewards/margins": 0.07954015582799911, |
|
"rewards/rejected": -0.20871467888355255, |
|
"step": 100, |
|
"use_label": 1518.0 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -1.7353737354278564, |
|
"eval_logits/rejected": -1.7198325395584106, |
|
"eval_logps/chosen": -80.33845520019531, |
|
"eval_logps/rejected": -106.64702606201172, |
|
"eval_loss": 0.6527961492538452, |
|
"eval_pred_label": 6.6875, |
|
"eval_rewards/accuracies": 0.36328125, |
|
"eval_rewards/chosen": -0.1642620712518692, |
|
"eval_rewards/margins": 0.13027876615524292, |
|
"eval_rewards/rejected": -0.2945408225059509, |
|
"eval_runtime": 125.2319, |
|
"eval_samples_per_second": 15.97, |
|
"eval_steps_per_second": 0.256, |
|
"eval_use_label": 1725.3125, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.0, |
|
"learning_rate": 4.746717530629565e-06, |
|
"logits/chosen": -1.7974278926849365, |
|
"logits/rejected": -1.7697474956512451, |
|
"logps/chosen": -89.79286193847656, |
|
"logps/rejected": -113.6241455078125, |
|
"loss": 0.6479, |
|
"pred_label": 9.199999809265137, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.18692079186439514, |
|
"rewards/margins": 0.16341358423233032, |
|
"rewards/rejected": -0.3503343462944031, |
|
"step": 110, |
|
"use_label": 1928.800048828125 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.890625, |
|
"learning_rate": 4.660472094042121e-06, |
|
"logits/chosen": -1.454304814338684, |
|
"logits/rejected": -1.3457725048065186, |
|
"logps/chosen": -109.3675537109375, |
|
"logps/rejected": -133.90725708007812, |
|
"loss": 0.6432, |
|
"pred_label": 14.949999809265137, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.3942197263240814, |
|
"rewards/margins": 0.21566259860992432, |
|
"rewards/rejected": -0.6098822951316833, |
|
"step": 120, |
|
"use_label": 2083.050048828125 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.5625, |
|
"learning_rate": 4.5626458262912745e-06, |
|
"logits/chosen": -1.0859026908874512, |
|
"logits/rejected": -1.0426993370056152, |
|
"logps/chosen": -112.0394515991211, |
|
"logps/rejected": -139.61097717285156, |
|
"loss": 0.6391, |
|
"pred_label": 21.049999237060547, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.4626106321811676, |
|
"rewards/margins": 0.20503444969654083, |
|
"rewards/rejected": -0.6676451563835144, |
|
"step": 130, |
|
"use_label": 2236.949951171875 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.484375, |
|
"learning_rate": 4.453763107901676e-06, |
|
"logits/chosen": -0.735418975353241, |
|
"logits/rejected": -0.8380192518234253, |
|
"logps/chosen": -138.07081604003906, |
|
"logps/rejected": -150.91665649414062, |
|
"loss": 0.6252, |
|
"pred_label": 31.399999618530273, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.5732325315475464, |
|
"rewards/margins": 0.1448771208524704, |
|
"rewards/rejected": -0.7181096076965332, |
|
"step": 140, |
|
"use_label": 2386.60009765625 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 3.859375, |
|
"learning_rate": 4.33440758555951e-06, |
|
"logits/chosen": -0.48231878876686096, |
|
"logits/rejected": -0.43882569670677185, |
|
"logps/chosen": -117.69664001464844, |
|
"logps/rejected": -150.86083984375, |
|
"loss": 0.6219, |
|
"pred_label": 43.45000076293945, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.5254992246627808, |
|
"rewards/margins": 0.3047201633453369, |
|
"rewards/rejected": -0.8302194476127625, |
|
"step": 150, |
|
"use_label": 2534.550048828125 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.890625, |
|
"learning_rate": 4.205219043576955e-06, |
|
"logits/chosen": -0.15186011791229248, |
|
"logits/rejected": -0.17336201667785645, |
|
"logps/chosen": -128.78500366210938, |
|
"logps/rejected": -159.26498413085938, |
|
"loss": 0.5982, |
|
"pred_label": 58.25, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": -0.6445494294166565, |
|
"rewards/margins": 0.17397476732730865, |
|
"rewards/rejected": -0.818524181842804, |
|
"step": 160, |
|
"use_label": 2679.75 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.328125, |
|
"learning_rate": 4.066889974440757e-06, |
|
"logits/chosen": 0.14322622120380402, |
|
"logits/rejected": 0.18100713193416595, |
|
"logps/chosen": -108.39127349853516, |
|
"logps/rejected": -140.55824279785156, |
|
"loss": 0.5938, |
|
"pred_label": 79.57499694824219, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.5288320779800415, |
|
"rewards/margins": 0.23454061150550842, |
|
"rewards/rejected": -0.7633727192878723, |
|
"step": 170, |
|
"use_label": 2818.425048828125 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.0, |
|
"learning_rate": 3.92016186682789e-06, |
|
"logits/chosen": -0.20601686835289001, |
|
"logits/rejected": -0.09364790469408035, |
|
"logps/chosen": -105.94217681884766, |
|
"logps/rejected": -130.695556640625, |
|
"loss": 0.6262, |
|
"pred_label": 100.2750015258789, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.4315454065799713, |
|
"rewards/margins": 0.3125666677951813, |
|
"rewards/rejected": -0.7441121339797974, |
|
"step": 180, |
|
"use_label": 2957.72509765625 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.734375, |
|
"learning_rate": 3.7658212309857576e-06, |
|
"logits/chosen": -0.34412023425102234, |
|
"logits/rejected": -0.07299783080816269, |
|
"logps/chosen": -107.5626449584961, |
|
"logps/rejected": -141.1322479248047, |
|
"loss": 0.6092, |
|
"pred_label": 121.05000305175781, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.48451024293899536, |
|
"rewards/margins": 0.28280580043792725, |
|
"rewards/rejected": -0.7673160433769226, |
|
"step": 190, |
|
"use_label": 3096.949951171875 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 6.5, |
|
"learning_rate": 3.604695382782159e-06, |
|
"logits/chosen": 0.03128425031900406, |
|
"logits/rejected": 0.20205454528331757, |
|
"logps/chosen": -145.35342407226562, |
|
"logps/rejected": -162.05667114257812, |
|
"loss": 0.6041, |
|
"pred_label": 135.89999389648438, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.6367710828781128, |
|
"rewards/margins": 0.25234952569007874, |
|
"rewards/rejected": -0.8891205787658691, |
|
"step": 200, |
|
"use_label": 3242.10009765625 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_logits/chosen": 0.886444091796875, |
|
"eval_logits/rejected": 0.9784458875656128, |
|
"eval_logps/chosen": -135.34742736816406, |
|
"eval_logps/rejected": -187.65963745117188, |
|
"eval_loss": 0.5936154723167419, |
|
"eval_pred_label": 167.40625, |
|
"eval_rewards/accuracies": 0.3515625, |
|
"eval_rewards/chosen": -0.7143516540527344, |
|
"eval_rewards/margins": 0.3903152644634247, |
|
"eval_rewards/rejected": -1.1046667098999023, |
|
"eval_runtime": 125.3006, |
|
"eval_samples_per_second": 15.962, |
|
"eval_steps_per_second": 0.255, |
|
"eval_use_label": 3420.59375, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 3.796875, |
|
"learning_rate": 3.437648009023905e-06, |
|
"logits/chosen": 0.6729141473770142, |
|
"logits/rejected": 0.6579598188400269, |
|
"logps/chosen": -119.19351959228516, |
|
"logps/rejected": -159.00997924804688, |
|
"loss": 0.5936, |
|
"pred_label": 201.6999969482422, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.63218754529953, |
|
"rewards/margins": 0.3281194567680359, |
|
"rewards/rejected": -0.9603070020675659, |
|
"step": 210, |
|
"use_label": 3592.300048828125 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 4.5, |
|
"learning_rate": 3.265574537815398e-06, |
|
"logits/chosen": 0.2854166626930237, |
|
"logits/rejected": 0.4488348066806793, |
|
"logps/chosen": -148.92379760742188, |
|
"logps/rejected": -161.19557189941406, |
|
"loss": 0.5938, |
|
"pred_label": 225.52499389648438, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.6929510235786438, |
|
"rewards/margins": 0.2553243637084961, |
|
"rewards/rejected": -0.9482753872871399, |
|
"step": 220, |
|
"use_label": 3728.47509765625 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.28125, |
|
"learning_rate": 3.089397338773569e-06, |
|
"logits/chosen": 0.00020002425299026072, |
|
"logits/rejected": 0.1493436098098755, |
|
"logps/chosen": -103.05213928222656, |
|
"logps/rejected": -136.05099487304688, |
|
"loss": 0.597, |
|
"pred_label": 252.02499389648438, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.3861756920814514, |
|
"rewards/margins": 0.3467464745044708, |
|
"rewards/rejected": -0.7329221963882446, |
|
"step": 230, |
|
"use_label": 3861.97509765625 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.125, |
|
"learning_rate": 2.9100607788275547e-06, |
|
"logits/chosen": 0.49308425188064575, |
|
"logits/rejected": 0.44487372040748596, |
|
"logps/chosen": -109.46275329589844, |
|
"logps/rejected": -153.8666534423828, |
|
"loss": 0.584, |
|
"pred_label": 275.375, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.40430212020874023, |
|
"rewards/margins": 0.3921273946762085, |
|
"rewards/rejected": -0.7964295148849487, |
|
"step": 240, |
|
"use_label": 3998.625 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.0625, |
|
"learning_rate": 2.72852616010567e-06, |
|
"logits/chosen": 0.3891890347003937, |
|
"logits/rejected": 0.47166162729263306, |
|
"logps/chosen": -122.5915298461914, |
|
"logps/rejected": -153.12493896484375, |
|
"loss": 0.5998, |
|
"pred_label": 301.875, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.4919242262840271, |
|
"rewards/margins": 0.3470008671283722, |
|
"rewards/rejected": -0.8389250636100769, |
|
"step": 250, |
|
"use_label": 4132.125 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.171875, |
|
"learning_rate": 2.5457665670441937e-06, |
|
"logits/chosen": 0.4214790463447571, |
|
"logits/rejected": 0.4202333092689514, |
|
"logps/chosen": -116.09378814697266, |
|
"logps/rejected": -156.8458251953125, |
|
"loss": 0.592, |
|
"pred_label": 326.3500061035156, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.4998815953731537, |
|
"rewards/margins": 0.324097216129303, |
|
"rewards/rejected": -0.8239787817001343, |
|
"step": 260, |
|
"use_label": 4267.64990234375 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 3.40625, |
|
"learning_rate": 2.3627616503391813e-06, |
|
"logits/chosen": 0.9609361886978149, |
|
"logits/rejected": 0.8760908246040344, |
|
"logps/chosen": -142.81573486328125, |
|
"logps/rejected": -170.10379028320312, |
|
"loss": 0.5888, |
|
"pred_label": 343.04998779296875, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.698999285697937, |
|
"rewards/margins": 0.30828553438186646, |
|
"rewards/rejected": -1.0072848796844482, |
|
"step": 270, |
|
"use_label": 4410.9501953125 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 2.234375, |
|
"learning_rate": 2.1804923757009885e-06, |
|
"logits/chosen": 1.1657536029815674, |
|
"logits/rejected": 1.3259608745574951, |
|
"logps/chosen": -131.4703826904297, |
|
"logps/rejected": -156.4979248046875, |
|
"loss": 0.6007, |
|
"pred_label": 361.5, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.6596485376358032, |
|
"rewards/margins": 0.27613458037376404, |
|
"rewards/rejected": -0.9357832074165344, |
|
"step": 280, |
|
"use_label": 4552.5 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 2.875, |
|
"learning_rate": 1.9999357655598894e-06, |
|
"logits/chosen": 0.9594011306762695, |
|
"logits/rejected": 0.9126796722412109, |
|
"logps/chosen": -144.55104064941406, |
|
"logps/rejected": -183.51065063476562, |
|
"loss": 0.5899, |
|
"pred_label": 386.07501220703125, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.7800258994102478, |
|
"rewards/margins": 0.2914672791957855, |
|
"rewards/rejected": -1.0714932680130005, |
|
"step": 290, |
|
"use_label": 4687.9248046875 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 3.1875, |
|
"learning_rate": 1.8220596619089576e-06, |
|
"logits/chosen": 1.2753574848175049, |
|
"logits/rejected": 1.1057071685791016, |
|
"logps/chosen": -165.4674072265625, |
|
"logps/rejected": -223.6466064453125, |
|
"loss": 0.5763, |
|
"pred_label": 409.9750061035156, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.8787307739257812, |
|
"rewards/margins": 0.41653138399124146, |
|
"rewards/rejected": -1.295262098312378, |
|
"step": 300, |
|
"use_label": 4824.02490234375 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": 1.6598718166351318, |
|
"eval_logits/rejected": 1.7526323795318604, |
|
"eval_logps/chosen": -143.2136993408203, |
|
"eval_logps/rejected": -200.36146545410156, |
|
"eval_loss": 0.5773172974586487, |
|
"eval_pred_label": 452.71875, |
|
"eval_rewards/accuracies": 0.3515625, |
|
"eval_rewards/chosen": -0.7930145263671875, |
|
"eval_rewards/margins": 0.4386705756187439, |
|
"eval_rewards/rejected": -1.2316851615905762, |
|
"eval_runtime": 125.3512, |
|
"eval_samples_per_second": 15.955, |
|
"eval_steps_per_second": 0.255, |
|
"eval_use_label": 4991.28125, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 3.0625, |
|
"learning_rate": 1.647817538357072e-06, |
|
"logits/chosen": 1.3793504238128662, |
|
"logits/rejected": 1.4072078466415405, |
|
"logps/chosen": -126.9173583984375, |
|
"logps/rejected": -186.46255493164062, |
|
"loss": 0.5633, |
|
"pred_label": 494.42498779296875, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.7132076025009155, |
|
"rewards/margins": 0.4689141809940338, |
|
"rewards/rejected": -1.1821218729019165, |
|
"step": 310, |
|
"use_label": 5155.5751953125 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 4.21875, |
|
"learning_rate": 1.4781433892011132e-06, |
|
"logits/chosen": 1.2615296840667725, |
|
"logits/rejected": 1.4717950820922852, |
|
"logps/chosen": -163.67529296875, |
|
"logps/rejected": -205.421142578125, |
|
"loss": 0.5761, |
|
"pred_label": 523.5250244140625, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.9060484766960144, |
|
"rewards/margins": 0.4762052893638611, |
|
"rewards/rejected": -1.382253885269165, |
|
"step": 320, |
|
"use_label": 5286.47509765625 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 2.359375, |
|
"learning_rate": 1.3139467229135999e-06, |
|
"logits/chosen": 1.4169238805770874, |
|
"logits/rejected": 1.4296729564666748, |
|
"logps/chosen": -150.2149200439453, |
|
"logps/rejected": -186.73570251464844, |
|
"loss": 0.5799, |
|
"pred_label": 550.125, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.8010675311088562, |
|
"rewards/margins": 0.3802093267440796, |
|
"rewards/rejected": -1.1812770366668701, |
|
"step": 330, |
|
"use_label": 5419.875 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 3.15625, |
|
"learning_rate": 1.1561076868822756e-06, |
|
"logits/chosen": 0.9984269142150879, |
|
"logits/rejected": 0.9373771548271179, |
|
"logps/chosen": -161.85842895507812, |
|
"logps/rejected": -182.74703979492188, |
|
"loss": 0.5933, |
|
"pred_label": 567.2000122070312, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.771192193031311, |
|
"rewards/margins": 0.2911759614944458, |
|
"rewards/rejected": -1.0623681545257568, |
|
"step": 340, |
|
"use_label": 5562.7998046875 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 2.59375, |
|
"learning_rate": 1.0054723495346484e-06, |
|
"logits/chosen": 0.83796626329422, |
|
"logits/rejected": 0.8520887494087219, |
|
"logps/chosen": -176.03054809570312, |
|
"logps/rejected": -217.10214233398438, |
|
"loss": 0.5863, |
|
"pred_label": 598.5750122070312, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.86613929271698, |
|
"rewards/margins": 0.4522012174129486, |
|
"rewards/rejected": -1.318340539932251, |
|
"step": 350, |
|
"use_label": 5691.4248046875 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 2.234375, |
|
"learning_rate": 8.628481651367876e-07, |
|
"logits/chosen": 0.7010875940322876, |
|
"logits/rejected": 0.8413160443305969, |
|
"logps/chosen": -126.9655532836914, |
|
"logps/rejected": -182.5807342529297, |
|
"loss": 0.5885, |
|
"pred_label": 629.0, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.6332792043685913, |
|
"rewards/margins": 0.47024598717689514, |
|
"rewards/rejected": -1.103525161743164, |
|
"step": 360, |
|
"use_label": 5821.0 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 2.4375, |
|
"learning_rate": 7.289996455765749e-07, |
|
"logits/chosen": 0.8454801440238953, |
|
"logits/rejected": 0.9659041166305542, |
|
"logps/chosen": -120.26502990722656, |
|
"logps/rejected": -170.44923400878906, |
|
"loss": 0.585, |
|
"pred_label": 655.625, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.5483022928237915, |
|
"rewards/margins": 0.4770358204841614, |
|
"rewards/rejected": -1.0253381729125977, |
|
"step": 370, |
|
"use_label": 5954.375 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 3.5625, |
|
"learning_rate": 6.046442623320145e-07, |
|
"logits/chosen": 0.7346574664115906, |
|
"logits/rejected": 0.7028430104255676, |
|
"logps/chosen": -131.0785675048828, |
|
"logps/rejected": -188.57435607910156, |
|
"loss": 0.589, |
|
"pred_label": 685.5250244140625, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.6524317264556885, |
|
"rewards/margins": 0.3696710765361786, |
|
"rewards/rejected": -1.0221028327941895, |
|
"step": 380, |
|
"use_label": 6084.47509765625 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 3.3125, |
|
"learning_rate": 4.904486005914027e-07, |
|
"logits/chosen": 1.1143369674682617, |
|
"logits/rejected": 0.8643951416015625, |
|
"logps/chosen": -179.11276245117188, |
|
"logps/rejected": -220.11068725585938, |
|
"loss": 0.5727, |
|
"pred_label": 717.625, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.8629444241523743, |
|
"rewards/margins": 0.508463442325592, |
|
"rewards/rejected": -1.3714077472686768, |
|
"step": 390, |
|
"use_label": 6212.375 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 3.25, |
|
"learning_rate": 3.8702478614051353e-07, |
|
"logits/chosen": 0.8043449521064758, |
|
"logits/rejected": 0.9917415380477905, |
|
"logps/chosen": -130.07017517089844, |
|
"logps/rejected": -163.469970703125, |
|
"loss": 0.5836, |
|
"pred_label": 747.5750122070312, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.5757918953895569, |
|
"rewards/margins": 0.42427974939346313, |
|
"rewards/rejected": -1.0000715255737305, |
|
"step": 400, |
|
"use_label": 6342.4248046875 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_logits/chosen": 1.75760817527771, |
|
"eval_logits/rejected": 1.8499951362609863, |
|
"eval_logps/chosen": -130.3719482421875, |
|
"eval_logps/rejected": -190.7267303466797, |
|
"eval_loss": 0.5768851041793823, |
|
"eval_pred_label": 782.8125, |
|
"eval_rewards/accuracies": 0.37109375, |
|
"eval_rewards/chosen": -0.6645968556404114, |
|
"eval_rewards/margins": 0.4707409739494324, |
|
"eval_rewards/rejected": -1.1353378295898438, |
|
"eval_runtime": 147.391, |
|
"eval_samples_per_second": 13.569, |
|
"eval_steps_per_second": 0.217, |
|
"eval_use_label": 6517.1875, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 3.390625, |
|
"learning_rate": 2.9492720416985004e-07, |
|
"logits/chosen": 1.1002473831176758, |
|
"logits/rejected": 1.1428117752075195, |
|
"logps/chosen": -126.85247802734375, |
|
"logps/rejected": -170.77365112304688, |
|
"loss": 0.5838, |
|
"pred_label": 822.75, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -0.6542948484420776, |
|
"rewards/margins": 0.4562492370605469, |
|
"rewards/rejected": -1.1105440855026245, |
|
"step": 410, |
|
"use_label": 6683.25 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 2.609375, |
|
"learning_rate": 2.1464952759020857e-07, |
|
"logits/chosen": 1.3246395587921143, |
|
"logits/rejected": 1.2824434041976929, |
|
"logps/chosen": -122.80003356933594, |
|
"logps/rejected": -138.56423950195312, |
|
"loss": 0.5822, |
|
"pred_label": 846.4249877929688, |
|
"rewards/accuracies": 0.26249998807907104, |
|
"rewards/chosen": -0.6186091303825378, |
|
"rewards/margins": 0.25320303440093994, |
|
"rewards/rejected": -0.8718121647834778, |
|
"step": 420, |
|
"use_label": 6819.5751953125 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 2.09375, |
|
"learning_rate": 1.4662207078575685e-07, |
|
"logits/chosen": 1.270193099975586, |
|
"logits/rejected": 1.253873348236084, |
|
"logps/chosen": -171.46336364746094, |
|
"logps/rejected": -207.75607299804688, |
|
"loss": 0.564, |
|
"pred_label": 873.5499877929688, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.7219651341438293, |
|
"rewards/margins": 0.5620242357254028, |
|
"rewards/rejected": -1.283989429473877, |
|
"step": 430, |
|
"use_label": 6952.4501953125 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 2.578125, |
|
"learning_rate": 9.120948298936422e-08, |
|
"logits/chosen": 1.221411943435669, |
|
"logits/rejected": 1.397247552871704, |
|
"logps/chosen": -136.4575653076172, |
|
"logps/rejected": -193.40870666503906, |
|
"loss": 0.5736, |
|
"pred_label": 905.5750122070312, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.6955360770225525, |
|
"rewards/margins": 0.4879213869571686, |
|
"rewards/rejected": -1.183457612991333, |
|
"step": 440, |
|
"use_label": 7080.4248046875 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 4.78125, |
|
"learning_rate": 4.870879364444109e-08, |
|
"logits/chosen": 1.6054052114486694, |
|
"logits/rejected": 1.3484258651733398, |
|
"logps/chosen": -148.17161560058594, |
|
"logps/rejected": -205.789306640625, |
|
"loss": 0.583, |
|
"pred_label": 930.4749755859375, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.7591380476951599, |
|
"rewards/margins": 0.4158584177494049, |
|
"rewards/rejected": -1.1749964952468872, |
|
"step": 450, |
|
"use_label": 7215.52490234375 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 2.875, |
|
"learning_rate": 1.93478202307823e-08, |
|
"logits/chosen": 1.4640157222747803, |
|
"logits/rejected": 1.4903802871704102, |
|
"logps/chosen": -96.6323471069336, |
|
"logps/rejected": -150.8868865966797, |
|
"loss": 0.5814, |
|
"pred_label": 961.25, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.5051247477531433, |
|
"rewards/margins": 0.3702928125858307, |
|
"rewards/rejected": -0.8754175901412964, |
|
"step": 460, |
|
"use_label": 7344.75 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 2.75, |
|
"learning_rate": 3.283947088983663e-09, |
|
"logits/chosen": 1.464422345161438, |
|
"logits/rejected": 1.2297132015228271, |
|
"logps/chosen": -130.30838012695312, |
|
"logps/rejected": -166.67605590820312, |
|
"loss": 0.5822, |
|
"pred_label": 982.8499755859375, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.6297920346260071, |
|
"rewards/margins": 0.34639838337898254, |
|
"rewards/rejected": -0.9761903882026672, |
|
"step": 470, |
|
"use_label": 7483.14990234375 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6110695428068533, |
|
"train_runtime": 9999.3279, |
|
"train_samples_per_second": 6.114, |
|
"train_steps_per_second": 0.048 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|