|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 252, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05952380952380952, |
|
"grad_norm": 1882.16845703125, |
|
"learning_rate": 2.5000000000000004e-07, |
|
"log_odds_chosen": -0.12500545382499695, |
|
"log_odds_ratio": -0.9542725682258606, |
|
"logits/chosen": 164.27560424804688, |
|
"logits/rejected": 208.2156219482422, |
|
"logps/chosen": -14.962623596191406, |
|
"logps/rejected": -14.837625503540039, |
|
"loss": 15.2102, |
|
"nll_loss": 14.645106315612793, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -7.481311798095703, |
|
"rewards/margins": -0.06250032037496567, |
|
"rewards/rejected": -7.4188127517700195, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.11904761904761904, |
|
"grad_norm": 1088.200927734375, |
|
"learning_rate": 5.000000000000001e-07, |
|
"log_odds_chosen": -0.05857907608151436, |
|
"log_odds_ratio": -1.1228755712509155, |
|
"logits/chosen": 244.7198486328125, |
|
"logits/rejected": 227.472412109375, |
|
"logps/chosen": -13.305212020874023, |
|
"logps/rejected": -13.2466402053833, |
|
"loss": 13.2102, |
|
"nll_loss": 12.955018997192383, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -6.652606010437012, |
|
"rewards/margins": -0.02928643301129341, |
|
"rewards/rejected": -6.62332010269165, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.17857142857142858, |
|
"grad_norm": 750.9570922851562, |
|
"learning_rate": 7.5e-07, |
|
"log_odds_chosen": -0.21123185753822327, |
|
"log_odds_ratio": -0.9812790751457214, |
|
"logits/chosen": 247.1725311279297, |
|
"logits/rejected": 319.17498779296875, |
|
"logps/chosen": -8.516304969787598, |
|
"logps/rejected": -8.305025100708008, |
|
"loss": 8.5319, |
|
"nll_loss": 8.219998359680176, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -4.258152484893799, |
|
"rewards/margins": -0.10563965886831284, |
|
"rewards/rejected": -4.152512550354004, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.23809523809523808, |
|
"grad_norm": 150.73776245117188, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"log_odds_chosen": 0.13484135270118713, |
|
"log_odds_ratio": -0.7639249563217163, |
|
"logits/chosen": 232.5557098388672, |
|
"logits/rejected": 278.36358642578125, |
|
"logps/chosen": -5.115365505218506, |
|
"logps/rejected": -5.248563289642334, |
|
"loss": 5.7184, |
|
"nll_loss": 5.363820552825928, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.557682752609253, |
|
"rewards/margins": 0.06659835577011108, |
|
"rewards/rejected": -2.624281644821167, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2976190476190476, |
|
"grad_norm": 132.29954528808594, |
|
"learning_rate": 1.25e-06, |
|
"log_odds_chosen": -0.15154710412025452, |
|
"log_odds_ratio": -0.918846607208252, |
|
"logits/chosen": 329.36016845703125, |
|
"logits/rejected": 322.80316162109375, |
|
"logps/chosen": -3.490060329437256, |
|
"logps/rejected": -3.336357593536377, |
|
"loss": 3.7837, |
|
"nll_loss": 3.5224061012268066, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.745030164718628, |
|
"rewards/margins": -0.07685144990682602, |
|
"rewards/rejected": -1.6681787967681885, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 66.39665985107422, |
|
"learning_rate": 1.5e-06, |
|
"log_odds_chosen": 0.1455865204334259, |
|
"log_odds_ratio": -0.7501406669616699, |
|
"logits/chosen": 351.40875244140625, |
|
"logits/rejected": 335.61932373046875, |
|
"logps/chosen": -2.3319332599639893, |
|
"logps/rejected": -2.4487643241882324, |
|
"loss": 2.9334, |
|
"nll_loss": 2.8211100101470947, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1659666299819946, |
|
"rewards/margins": 0.05841563269495964, |
|
"rewards/rejected": -1.2243821620941162, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 50.658451080322266, |
|
"learning_rate": 1.75e-06, |
|
"log_odds_chosen": 0.17109766602516174, |
|
"log_odds_ratio": -0.6774098873138428, |
|
"logits/chosen": 386.70220947265625, |
|
"logits/rejected": 384.7711486816406, |
|
"logps/chosen": -1.935703992843628, |
|
"logps/rejected": -2.0668416023254395, |
|
"loss": 2.3732, |
|
"nll_loss": 2.1755499839782715, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.967851996421814, |
|
"rewards/margins": 0.06556873768568039, |
|
"rewards/rejected": -1.0334208011627197, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 50.109127044677734, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"log_odds_chosen": 0.5132101774215698, |
|
"log_odds_ratio": -0.58674156665802, |
|
"logits/chosen": 395.3487243652344, |
|
"logits/rejected": 396.77911376953125, |
|
"logps/chosen": -1.6742804050445557, |
|
"logps/rejected": -2.092700481414795, |
|
"loss": 2.2549, |
|
"nll_loss": 1.8625695705413818, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8371402025222778, |
|
"rewards/margins": 0.20921015739440918, |
|
"rewards/rejected": -1.0463502407073975, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5357142857142857, |
|
"grad_norm": 36.12651062011719, |
|
"learning_rate": 2.25e-06, |
|
"log_odds_chosen": 0.40647760033607483, |
|
"log_odds_ratio": -0.7087821364402771, |
|
"logits/chosen": 416.9222717285156, |
|
"logits/rejected": 409.3716125488281, |
|
"logps/chosen": -1.7572282552719116, |
|
"logps/rejected": -2.102989435195923, |
|
"loss": 2.1445, |
|
"nll_loss": 1.97689950466156, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8786141276359558, |
|
"rewards/margins": 0.1728806048631668, |
|
"rewards/rejected": -1.0514947175979614, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.5952380952380952, |
|
"grad_norm": 129.66111755371094, |
|
"learning_rate": 2.5e-06, |
|
"log_odds_chosen": 0.4818713068962097, |
|
"log_odds_ratio": -0.607313871383667, |
|
"logits/chosen": 385.31231689453125, |
|
"logits/rejected": 411.05029296875, |
|
"logps/chosen": -1.5724234580993652, |
|
"logps/rejected": -1.9852949380874634, |
|
"loss": 2.1136, |
|
"nll_loss": 1.7941957712173462, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7862117290496826, |
|
"rewards/margins": 0.20643571019172668, |
|
"rewards/rejected": -0.9926474690437317, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6547619047619048, |
|
"grad_norm": 65.99632263183594, |
|
"learning_rate": 2.7500000000000004e-06, |
|
"log_odds_chosen": 0.3759257197380066, |
|
"log_odds_ratio": -0.658983588218689, |
|
"logits/chosen": 393.38006591796875, |
|
"logits/rejected": 373.91265869140625, |
|
"logps/chosen": -1.5574285984039307, |
|
"logps/rejected": -1.8452409505844116, |
|
"loss": 2.0521, |
|
"nll_loss": 1.9158170223236084, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7787142992019653, |
|
"rewards/margins": 0.14390619099140167, |
|
"rewards/rejected": -0.9226204752922058, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 42.2856559753418, |
|
"learning_rate": 3e-06, |
|
"log_odds_chosen": 0.6560322642326355, |
|
"log_odds_ratio": -0.5206496119499207, |
|
"logits/chosen": 394.558349609375, |
|
"logits/rejected": 419.30908203125, |
|
"logps/chosen": -1.252516746520996, |
|
"logps/rejected": -1.736289620399475, |
|
"loss": 1.9986, |
|
"nll_loss": 1.6350934505462646, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.626258373260498, |
|
"rewards/margins": 0.2418864220380783, |
|
"rewards/rejected": -0.8681448101997375, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7738095238095238, |
|
"grad_norm": 51.25124740600586, |
|
"learning_rate": 3.2500000000000002e-06, |
|
"log_odds_chosen": 0.20805387198925018, |
|
"log_odds_ratio": -0.700042724609375, |
|
"logits/chosen": 386.62237548828125, |
|
"logits/rejected": 375.99847412109375, |
|
"logps/chosen": -1.3062386512756348, |
|
"logps/rejected": -1.4444353580474854, |
|
"loss": 1.9611, |
|
"nll_loss": 1.55000901222229, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.6531193256378174, |
|
"rewards/margins": 0.06909838318824768, |
|
"rewards/rejected": -0.7222176790237427, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 56.384552001953125, |
|
"learning_rate": 3.5e-06, |
|
"log_odds_chosen": 0.2420281618833542, |
|
"log_odds_ratio": -0.6607708930969238, |
|
"logits/chosen": 376.65167236328125, |
|
"logits/rejected": 374.722900390625, |
|
"logps/chosen": -1.2764971256256104, |
|
"logps/rejected": -1.4956505298614502, |
|
"loss": 1.9944, |
|
"nll_loss": 1.656867265701294, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6382485628128052, |
|
"rewards/margins": 0.10957670211791992, |
|
"rewards/rejected": -0.7478252649307251, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8928571428571429, |
|
"grad_norm": 35.837562561035156, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"log_odds_chosen": 0.43760427832603455, |
|
"log_odds_ratio": -0.5639179944992065, |
|
"logits/chosen": 369.7068786621094, |
|
"logits/rejected": 380.7843017578125, |
|
"logps/chosen": -1.254392385482788, |
|
"logps/rejected": -1.5828098058700562, |
|
"loss": 1.904, |
|
"nll_loss": 1.5474170446395874, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.627196192741394, |
|
"rewards/margins": 0.16420873999595642, |
|
"rewards/rejected": -0.7914049029350281, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 61.186588287353516, |
|
"learning_rate": 4.000000000000001e-06, |
|
"log_odds_chosen": 0.20079848170280457, |
|
"log_odds_ratio": -0.7042320370674133, |
|
"logits/chosen": 372.26202392578125, |
|
"logits/rejected": 395.8300476074219, |
|
"logps/chosen": -1.294389009475708, |
|
"logps/rejected": -1.4353959560394287, |
|
"loss": 1.8227, |
|
"nll_loss": 1.4922560453414917, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.647194504737854, |
|
"rewards/margins": 0.07050346583127975, |
|
"rewards/rejected": -0.7176979780197144, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_log_odds_chosen": 0.16461706161499023, |
|
"eval_log_odds_ratio": -0.6902630925178528, |
|
"eval_logits/chosen": 315.19403076171875, |
|
"eval_logits/rejected": 257.8447265625, |
|
"eval_logps/chosen": -1.2099318504333496, |
|
"eval_logps/rejected": -1.348587989807129, |
|
"eval_loss": 1.9615823030471802, |
|
"eval_nll_loss": 1.6718581914901733, |
|
"eval_rewards/accuracies": 0.5, |
|
"eval_rewards/chosen": -0.6049659252166748, |
|
"eval_rewards/margins": 0.06932813674211502, |
|
"eval_rewards/rejected": -0.6742939949035645, |
|
"eval_runtime": 201.4785, |
|
"eval_samples_per_second": 2.745, |
|
"eval_steps_per_second": 0.347, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.0119047619047619, |
|
"grad_norm": 49.942501068115234, |
|
"learning_rate": 4.25e-06, |
|
"log_odds_chosen": 0.3827090859413147, |
|
"log_odds_ratio": -0.5646133422851562, |
|
"logits/chosen": 385.1676330566406, |
|
"logits/rejected": 408.2163391113281, |
|
"logps/chosen": -1.2135608196258545, |
|
"logps/rejected": -1.460314154624939, |
|
"loss": 1.8474, |
|
"nll_loss": 1.4997951984405518, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6067804098129272, |
|
"rewards/margins": 0.1233767420053482, |
|
"rewards/rejected": -0.7301570773124695, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.0714285714285714, |
|
"grad_norm": 121.16907501220703, |
|
"learning_rate": 4.5e-06, |
|
"log_odds_chosen": 0.7085806131362915, |
|
"log_odds_ratio": -0.4863010346889496, |
|
"logits/chosen": 366.0420837402344, |
|
"logits/rejected": 378.7876281738281, |
|
"logps/chosen": -1.073919653892517, |
|
"logps/rejected": -1.589540719985962, |
|
"loss": 1.6684, |
|
"nll_loss": 1.5809502601623535, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5369598269462585, |
|
"rewards/margins": 0.2578105330467224, |
|
"rewards/rejected": -0.794770359992981, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.130952380952381, |
|
"grad_norm": 148.60858154296875, |
|
"learning_rate": 4.75e-06, |
|
"log_odds_chosen": 0.875158965587616, |
|
"log_odds_ratio": -0.42892536520957947, |
|
"logits/chosen": 411.90411376953125, |
|
"logits/rejected": 384.31939697265625, |
|
"logps/chosen": -1.1889146566390991, |
|
"logps/rejected": -1.8593413829803467, |
|
"loss": 1.7409, |
|
"nll_loss": 1.6732642650604248, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5944573283195496, |
|
"rewards/margins": 0.3352133333683014, |
|
"rewards/rejected": -0.9296706914901733, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.1904761904761905, |
|
"grad_norm": 21.357654571533203, |
|
"learning_rate": 5e-06, |
|
"log_odds_chosen": 0.9980722665786743, |
|
"log_odds_ratio": -0.40565505623817444, |
|
"logits/chosen": 396.3813171386719, |
|
"logits/rejected": 391.2807312011719, |
|
"logps/chosen": -1.028236985206604, |
|
"logps/rejected": -1.7792075872421265, |
|
"loss": 1.6731, |
|
"nll_loss": 1.3774528503417969, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.514118492603302, |
|
"rewards/margins": 0.37548530101776123, |
|
"rewards/rejected": -0.8896037936210632, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 92.72647857666016, |
|
"learning_rate": 4.8795003647426654e-06, |
|
"log_odds_chosen": 1.2707650661468506, |
|
"log_odds_ratio": -0.3489342927932739, |
|
"logits/chosen": 329.43988037109375, |
|
"logits/rejected": 359.312744140625, |
|
"logps/chosen": -0.9563199877738953, |
|
"logps/rejected": -1.8967196941375732, |
|
"loss": 1.5343, |
|
"nll_loss": 1.4084365367889404, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.47815999388694763, |
|
"rewards/margins": 0.4701998233795166, |
|
"rewards/rejected": -0.9483598470687866, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.3095238095238095, |
|
"grad_norm": 23.097333908081055, |
|
"learning_rate": 4.767312946227961e-06, |
|
"log_odds_chosen": 1.0622062683105469, |
|
"log_odds_ratio": -0.40513938665390015, |
|
"logits/chosen": 364.5774841308594, |
|
"logits/rejected": 378.61834716796875, |
|
"logps/chosen": -0.97075355052948, |
|
"logps/rejected": -1.6918100118637085, |
|
"loss": 1.5379, |
|
"nll_loss": 1.3676984310150146, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.48537677526474, |
|
"rewards/margins": 0.36052826046943665, |
|
"rewards/rejected": -0.8459050059318542, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.369047619047619, |
|
"grad_norm": 29.07915496826172, |
|
"learning_rate": 4.662524041201569e-06, |
|
"log_odds_chosen": 0.8752411603927612, |
|
"log_odds_ratio": -0.4561616778373718, |
|
"logits/chosen": 365.3411560058594, |
|
"logits/rejected": 359.665771484375, |
|
"logps/chosen": -1.142798662185669, |
|
"logps/rejected": -1.8039169311523438, |
|
"loss": 1.4911, |
|
"nll_loss": 1.328161597251892, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5713993310928345, |
|
"rewards/margins": 0.3305591344833374, |
|
"rewards/rejected": -0.9019584655761719, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 25.719493865966797, |
|
"learning_rate": 4.564354645876385e-06, |
|
"log_odds_chosen": 1.2218338251113892, |
|
"log_odds_ratio": -0.38108527660369873, |
|
"logits/chosen": 393.53240966796875, |
|
"logits/rejected": 404.69671630859375, |
|
"logps/chosen": -0.9683561325073242, |
|
"logps/rejected": -1.8645473718643188, |
|
"loss": 1.5413, |
|
"nll_loss": 1.2556110620498657, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.4841780662536621, |
|
"rewards/margins": 0.4480956196784973, |
|
"rewards/rejected": -0.9322736859321594, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.4880952380952381, |
|
"grad_norm": 33.33296585083008, |
|
"learning_rate": 4.47213595499958e-06, |
|
"log_odds_chosen": 0.8903130292892456, |
|
"log_odds_ratio": -0.4254421591758728, |
|
"logits/chosen": 376.06280517578125, |
|
"logits/rejected": 374.7559814453125, |
|
"logps/chosen": -1.0784931182861328, |
|
"logps/rejected": -1.7109521627426147, |
|
"loss": 1.5507, |
|
"nll_loss": 1.3247863054275513, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.5392465591430664, |
|
"rewards/margins": 0.3162294924259186, |
|
"rewards/rejected": -0.8554760813713074, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.5476190476190477, |
|
"grad_norm": 30.346759796142578, |
|
"learning_rate": 4.385290096535147e-06, |
|
"log_odds_chosen": 1.1133849620819092, |
|
"log_odds_ratio": -0.4015190601348877, |
|
"logits/chosen": 380.5897521972656, |
|
"logits/rejected": 411.142333984375, |
|
"logps/chosen": -1.0903799533843994, |
|
"logps/rejected": -1.9482700824737549, |
|
"loss": 1.5205, |
|
"nll_loss": 1.3193198442459106, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.5451899766921997, |
|
"rewards/margins": 0.42894500494003296, |
|
"rewards/rejected": -0.9741350412368774, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.6071428571428572, |
|
"grad_norm": 23.75062370300293, |
|
"learning_rate": 4.303314829119352e-06, |
|
"log_odds_chosen": 0.8445339202880859, |
|
"log_odds_ratio": -0.4824402332305908, |
|
"logits/chosen": 383.6838073730469, |
|
"logits/rejected": 385.19677734375, |
|
"logps/chosen": -1.1689434051513672, |
|
"logps/rejected": -1.7730737924575806, |
|
"loss": 1.6065, |
|
"nll_loss": 1.434819221496582, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5844717025756836, |
|
"rewards/margins": 0.3020651936531067, |
|
"rewards/rejected": -0.8865368962287903, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 27.057226181030273, |
|
"learning_rate": 4.2257712736425835e-06, |
|
"log_odds_chosen": 0.6634833216667175, |
|
"log_odds_ratio": -0.5241434574127197, |
|
"logits/chosen": 355.0412902832031, |
|
"logits/rejected": 345.8267517089844, |
|
"logps/chosen": -0.9373375773429871, |
|
"logps/rejected": -1.3723398447036743, |
|
"loss": 1.5457, |
|
"nll_loss": 1.1873310804367065, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.46866878867149353, |
|
"rewards/margins": 0.21750116348266602, |
|
"rewards/rejected": -0.6861699223518372, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.7261904761904763, |
|
"grad_norm": 24.420185089111328, |
|
"learning_rate": 4.1522739926869985e-06, |
|
"log_odds_chosen": 1.1559978723526, |
|
"log_odds_ratio": -0.4130094647407532, |
|
"logits/chosen": 353.3649597167969, |
|
"logits/rejected": 403.1065368652344, |
|
"logps/chosen": -0.9650250673294067, |
|
"logps/rejected": -1.8172032833099365, |
|
"loss": 1.4955, |
|
"nll_loss": 1.2150856256484985, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.48251253366470337, |
|
"rewards/margins": 0.4260891377925873, |
|
"rewards/rejected": -0.9086016416549683, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.7857142857142856, |
|
"grad_norm": 19.80653953552246, |
|
"learning_rate": 4.082482904638631e-06, |
|
"log_odds_chosen": 0.7758156657218933, |
|
"log_odds_ratio": -0.4749727249145508, |
|
"logits/chosen": 381.03961181640625, |
|
"logits/rejected": 387.2470703125, |
|
"logps/chosen": -1.0167145729064941, |
|
"logps/rejected": -1.5349647998809814, |
|
"loss": 1.5271, |
|
"nll_loss": 1.3638825416564941, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5083572864532471, |
|
"rewards/margins": 0.25912514328956604, |
|
"rewards/rejected": -0.7674823999404907, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.8452380952380953, |
|
"grad_norm": 16.095684051513672, |
|
"learning_rate": 4.016096644512495e-06, |
|
"log_odds_chosen": 0.729617714881897, |
|
"log_odds_ratio": -0.5468782782554626, |
|
"logits/chosen": 389.2290344238281, |
|
"logits/rejected": 382.7873840332031, |
|
"logps/chosen": -1.1134544610977173, |
|
"logps/rejected": -1.6494137048721313, |
|
"loss": 1.5063, |
|
"nll_loss": 1.3405725955963135, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5567272305488586, |
|
"rewards/margins": 0.26797956228256226, |
|
"rewards/rejected": -0.8247068524360657, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.9047619047619047, |
|
"grad_norm": 27.426509857177734, |
|
"learning_rate": 3.952847075210474e-06, |
|
"log_odds_chosen": 0.8723602294921875, |
|
"log_odds_ratio": -0.5240803956985474, |
|
"logits/chosen": 381.7905578613281, |
|
"logits/rejected": 403.63665771484375, |
|
"logps/chosen": -1.0924057960510254, |
|
"logps/rejected": -1.7700122594833374, |
|
"loss": 1.5276, |
|
"nll_loss": 1.3452767133712769, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.5462028980255127, |
|
"rewards/margins": 0.3388032019138336, |
|
"rewards/rejected": -0.8850061297416687, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.9642857142857144, |
|
"grad_norm": 26.49502944946289, |
|
"learning_rate": 3.892494720807615e-06, |
|
"log_odds_chosen": 1.3228471279144287, |
|
"log_odds_ratio": -0.42377227544784546, |
|
"logits/chosen": 396.58697509765625, |
|
"logits/rejected": 413.573974609375, |
|
"logps/chosen": -1.0189117193222046, |
|
"logps/rejected": -2.0456955432891846, |
|
"loss": 1.4803, |
|
"nll_loss": 1.2141263484954834, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.5094558596611023, |
|
"rewards/margins": 0.5133919715881348, |
|
"rewards/rejected": -1.0228477716445923, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_log_odds_chosen": 0.25614500045776367, |
|
"eval_log_odds_ratio": -0.6718475818634033, |
|
"eval_logits/chosen": 328.0206604003906, |
|
"eval_logits/rejected": 274.3525695800781, |
|
"eval_logps/chosen": -1.0924270153045654, |
|
"eval_logps/rejected": -1.3016821146011353, |
|
"eval_loss": 1.7681158781051636, |
|
"eval_nll_loss": 1.4853813648223877, |
|
"eval_rewards/accuracies": 0.5285714268684387, |
|
"eval_rewards/chosen": -0.5462135076522827, |
|
"eval_rewards/margins": 0.10462753474712372, |
|
"eval_rewards/rejected": -0.6508410573005676, |
|
"eval_runtime": 201.7398, |
|
"eval_samples_per_second": 2.741, |
|
"eval_steps_per_second": 0.347, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 2.0238095238095237, |
|
"grad_norm": 20.41162872314453, |
|
"learning_rate": 3.834824944236852e-06, |
|
"log_odds_chosen": 1.3698937892913818, |
|
"log_odds_ratio": -0.44814401865005493, |
|
"logits/chosen": 386.51275634765625, |
|
"logits/rejected": 390.8349304199219, |
|
"logps/chosen": -1.1337850093841553, |
|
"logps/rejected": -2.192959785461426, |
|
"loss": 1.3382, |
|
"nll_loss": 1.3109896183013916, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5668925046920776, |
|
"rewards/margins": 0.52958744764328, |
|
"rewards/rejected": -1.096479892730713, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.0833333333333335, |
|
"grad_norm": 29.53614044189453, |
|
"learning_rate": 3.7796447300922724e-06, |
|
"log_odds_chosen": 3.1386218070983887, |
|
"log_odds_ratio": -0.13156357407569885, |
|
"logits/chosen": 374.08306884765625, |
|
"logits/rejected": 370.32257080078125, |
|
"logps/chosen": -0.6971138119697571, |
|
"logps/rejected": -2.928040027618408, |
|
"loss": 0.963, |
|
"nll_loss": 1.0014644861221313, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.34855690598487854, |
|
"rewards/margins": 1.115463137626648, |
|
"rewards/rejected": -1.464020013809204, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.142857142857143, |
|
"grad_norm": 20.2189884185791, |
|
"learning_rate": 3.72677996249965e-06, |
|
"log_odds_chosen": 2.8878941535949707, |
|
"log_odds_ratio": -0.10618897527456284, |
|
"logits/chosen": 361.66241455078125, |
|
"logits/rejected": 393.54351806640625, |
|
"logps/chosen": -0.572822093963623, |
|
"logps/rejected": -2.587498664855957, |
|
"loss": 0.9084, |
|
"nll_loss": 0.8843202590942383, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2864110469818115, |
|
"rewards/margins": 1.007338285446167, |
|
"rewards/rejected": -1.2937493324279785, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.2023809523809526, |
|
"grad_norm": 15.425383567810059, |
|
"learning_rate": 3.6760731104690393e-06, |
|
"log_odds_chosen": 2.7572951316833496, |
|
"log_odds_ratio": -0.13107402622699738, |
|
"logits/chosen": 328.209228515625, |
|
"logits/rejected": 318.043701171875, |
|
"logps/chosen": -0.5289679765701294, |
|
"logps/rejected": -2.471531629562378, |
|
"loss": 0.9075, |
|
"nll_loss": 0.8783146142959595, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2644839882850647, |
|
"rewards/margins": 0.9712821245193481, |
|
"rewards/rejected": -1.235765814781189, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.261904761904762, |
|
"grad_norm": 17.949966430664062, |
|
"learning_rate": 3.6273812505500587e-06, |
|
"log_odds_chosen": 2.646030902862549, |
|
"log_odds_ratio": -0.1722700595855713, |
|
"logits/chosen": 373.1665954589844, |
|
"logits/rejected": 363.8042297363281, |
|
"logps/chosen": -0.6163553595542908, |
|
"logps/rejected": -2.3227782249450684, |
|
"loss": 0.8853, |
|
"nll_loss": 0.8921284675598145, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.3081776797771454, |
|
"rewards/margins": 0.8532115817070007, |
|
"rewards/rejected": -1.1613891124725342, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.3214285714285716, |
|
"grad_norm": 15.254051208496094, |
|
"learning_rate": 3.5805743701971648e-06, |
|
"log_odds_chosen": 3.064070463180542, |
|
"log_odds_ratio": -0.11556991189718246, |
|
"logits/chosen": 355.3418884277344, |
|
"logits/rejected": 384.1878967285156, |
|
"logps/chosen": -0.5151618123054504, |
|
"logps/rejected": -2.5838632583618164, |
|
"loss": 0.9013, |
|
"nll_loss": 0.8009279370307922, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.2575809061527252, |
|
"rewards/margins": 1.0343506336212158, |
|
"rewards/rejected": -1.2919316291809082, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.380952380952381, |
|
"grad_norm": 18.17316246032715, |
|
"learning_rate": 3.5355339059327378e-06, |
|
"log_odds_chosen": 3.8410885334014893, |
|
"log_odds_ratio": -0.07899609953165054, |
|
"logits/chosen": 351.03741455078125, |
|
"logits/rejected": 372.4954528808594, |
|
"logps/chosen": -0.4664763808250427, |
|
"logps/rejected": -3.2068772315979004, |
|
"loss": 0.8742, |
|
"nll_loss": 0.7695188522338867, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.23323819041252136, |
|
"rewards/margins": 1.3702003955841064, |
|
"rewards/rejected": -1.6034386157989502, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.4404761904761907, |
|
"grad_norm": 13.679876327514648, |
|
"learning_rate": 3.4921514788478916e-06, |
|
"log_odds_chosen": 2.8450164794921875, |
|
"log_odds_ratio": -0.16636498272418976, |
|
"logits/chosen": 386.46038818359375, |
|
"logits/rejected": 371.1928405761719, |
|
"logps/chosen": -0.5075671076774597, |
|
"logps/rejected": -2.49824595451355, |
|
"loss": 0.8909, |
|
"nll_loss": 0.7808379530906677, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.25378355383872986, |
|
"rewards/margins": 0.9953393936157227, |
|
"rewards/rejected": -1.249122977256775, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 15.127974510192871, |
|
"learning_rate": 3.450327796711771e-06, |
|
"log_odds_chosen": 3.1379058361053467, |
|
"log_odds_ratio": -0.0948343575000763, |
|
"logits/chosen": 385.96038818359375, |
|
"logits/rejected": 346.40997314453125, |
|
"logps/chosen": -0.46884965896606445, |
|
"logps/rejected": -2.5977988243103027, |
|
"loss": 0.8884, |
|
"nll_loss": 0.9064348340034485, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.23442482948303223, |
|
"rewards/margins": 1.0644747018814087, |
|
"rewards/rejected": -1.2988994121551514, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.5595238095238093, |
|
"grad_norm": 14.862520217895508, |
|
"learning_rate": 3.409971697352368e-06, |
|
"log_odds_chosen": 3.3548312187194824, |
|
"log_odds_ratio": -0.07949019968509674, |
|
"logits/chosen": 366.4288024902344, |
|
"logits/rejected": 379.4535827636719, |
|
"logps/chosen": -0.48801979422569275, |
|
"logps/rejected": -2.8395187854766846, |
|
"loss": 0.8614, |
|
"nll_loss": 0.8571261167526245, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24400989711284637, |
|
"rewards/margins": 1.1757495403289795, |
|
"rewards/rejected": -1.4197593927383423, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.619047619047619, |
|
"grad_norm": 14.53584098815918, |
|
"learning_rate": 3.3709993123162106e-06, |
|
"log_odds_chosen": 2.9063503742218018, |
|
"log_odds_ratio": -0.11560215055942535, |
|
"logits/chosen": 353.8319396972656, |
|
"logits/rejected": 356.432373046875, |
|
"logps/chosen": -0.4644307494163513, |
|
"logps/rejected": -2.378437042236328, |
|
"loss": 0.9108, |
|
"nll_loss": 0.7656416893005371, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23221537470817566, |
|
"rewards/margins": 0.9570032358169556, |
|
"rewards/rejected": -1.189218521118164, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.678571428571429, |
|
"grad_norm": 17.206817626953125, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"log_odds_chosen": 3.093144178390503, |
|
"log_odds_ratio": -0.10332699865102768, |
|
"logits/chosen": 360.0953063964844, |
|
"logits/rejected": 369.03460693359375, |
|
"logps/chosen": -0.6034930944442749, |
|
"logps/rejected": -2.8071255683898926, |
|
"loss": 0.9202, |
|
"nll_loss": 0.8482205271720886, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.30174654722213745, |
|
"rewards/margins": 1.1018160581588745, |
|
"rewards/rejected": -1.4035627841949463, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.738095238095238, |
|
"grad_norm": 32.90928268432617, |
|
"learning_rate": 3.296902366978936e-06, |
|
"log_odds_chosen": 2.7679336071014404, |
|
"log_odds_ratio": -0.1059746965765953, |
|
"logits/chosen": 395.354736328125, |
|
"logits/rejected": 391.44451904296875, |
|
"logps/chosen": -0.48557600378990173, |
|
"logps/rejected": -2.241939067840576, |
|
"loss": 0.9492, |
|
"nll_loss": 0.863217830657959, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24278800189495087, |
|
"rewards/margins": 0.8781815767288208, |
|
"rewards/rejected": -1.120969533920288, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.7976190476190474, |
|
"grad_norm": 19.33934783935547, |
|
"learning_rate": 3.2616403652672114e-06, |
|
"log_odds_chosen": 2.7701828479766846, |
|
"log_odds_ratio": -0.13227275013923645, |
|
"logits/chosen": 384.26873779296875, |
|
"logits/rejected": 360.5152893066406, |
|
"logps/chosen": -0.5336109399795532, |
|
"logps/rejected": -2.403452157974243, |
|
"loss": 0.9262, |
|
"nll_loss": 0.8939388394355774, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.2668054699897766, |
|
"rewards/margins": 0.9349204897880554, |
|
"rewards/rejected": -1.2017260789871216, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 20.41938591003418, |
|
"learning_rate": 3.2274861218395142e-06, |
|
"log_odds_chosen": 3.104609251022339, |
|
"log_odds_ratio": -0.09935127198696136, |
|
"logits/chosen": 356.3617248535156, |
|
"logits/rejected": 407.67620849609375, |
|
"logps/chosen": -0.5356149673461914, |
|
"logps/rejected": -2.744302272796631, |
|
"loss": 0.8823, |
|
"nll_loss": 0.7891393899917603, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2678074836730957, |
|
"rewards/margins": 1.1043436527252197, |
|
"rewards/rejected": -1.3721511363983154, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.9166666666666665, |
|
"grad_norm": 28.204269409179688, |
|
"learning_rate": 3.1943828249997e-06, |
|
"log_odds_chosen": 2.208242893218994, |
|
"log_odds_ratio": -0.1713695377111435, |
|
"logits/chosen": 374.45611572265625, |
|
"logits/rejected": 340.09619140625, |
|
"logps/chosen": -0.7715775966644287, |
|
"logps/rejected": -2.332731008529663, |
|
"loss": 0.9365, |
|
"nll_loss": 0.9215513467788696, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.38578879833221436, |
|
"rewards/margins": 0.7805767059326172, |
|
"rewards/rejected": -1.1663655042648315, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.9761904761904763, |
|
"grad_norm": 30.126291275024414, |
|
"learning_rate": 3.1622776601683796e-06, |
|
"log_odds_chosen": 2.9035000801086426, |
|
"log_odds_ratio": -0.11840321123600006, |
|
"logits/chosen": 376.98828125, |
|
"logits/rejected": 380.9736328125, |
|
"logps/chosen": -0.48586076498031616, |
|
"logps/rejected": -2.4321203231811523, |
|
"loss": 0.9109, |
|
"nll_loss": 0.7184505462646484, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.24293038249015808, |
|
"rewards/margins": 0.9731297492980957, |
|
"rewards/rejected": -1.2160601615905762, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_log_odds_chosen": 0.4396049678325653, |
|
"eval_log_odds_ratio": -0.6420542597770691, |
|
"eval_logits/chosen": 301.1214599609375, |
|
"eval_logits/rejected": 240.39068603515625, |
|
"eval_logps/chosen": -1.19857656955719, |
|
"eval_logps/rejected": -1.5204962491989136, |
|
"eval_loss": 1.8577181100845337, |
|
"eval_nll_loss": 1.5532194375991821, |
|
"eval_rewards/accuracies": 0.6142857074737549, |
|
"eval_rewards/chosen": -0.599288284778595, |
|
"eval_rewards/margins": 0.16095994412899017, |
|
"eval_rewards/rejected": -0.7602481245994568, |
|
"eval_runtime": 201.7642, |
|
"eval_samples_per_second": 2.741, |
|
"eval_steps_per_second": 0.347, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 252, |
|
"total_flos": 0.0, |
|
"train_loss": 2.2429193542117165, |
|
"train_runtime": 13126.4564, |
|
"train_samples_per_second": 1.226, |
|
"train_steps_per_second": 0.019 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 252, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|