hibana2077's picture
Upload folder using huggingface_hub
5081dc9
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 876,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"learning_rate": 1.7045454545454546e-05,
"logits/chosen": -2.9543049335479736,
"logits/rejected": -4.587946891784668,
"logps/chosen": -4854.5478515625,
"logps/rejected": -32.31528854370117,
"loss": 0.3787,
"rewards/accuracies": 0.8017241358757019,
"rewards/chosen": 4.192387104034424,
"rewards/margins": 4.192722797393799,
"rewards/rejected": -0.0003354697546456009,
"step": 58
},
{
"epoch": 0.4,
"learning_rate": 2.9238578680203047e-05,
"logits/chosen": -3.0135436058044434,
"logits/rejected": -4.595321178436279,
"logps/chosen": -5432.6845703125,
"logps/rejected": -32.69685363769531,
"loss": 0.0201,
"rewards/accuracies": 1.0,
"rewards/chosen": 29.855911254882812,
"rewards/margins": 29.8657283782959,
"rewards/rejected": -0.009818021208047867,
"step": 116
},
{
"epoch": 0.6,
"learning_rate": 2.7030456852791878e-05,
"logits/chosen": -2.929417133331299,
"logits/rejected": -4.776305198669434,
"logps/chosen": -4491.6943359375,
"logps/rejected": -32.90016174316406,
"loss": 0.0064,
"rewards/accuracies": 1.0,
"rewards/chosen": 42.8309440612793,
"rewards/margins": 42.856590270996094,
"rewards/rejected": -0.02564803883433342,
"step": 174
},
{
"epoch": 0.79,
"learning_rate": 2.4822335025380712e-05,
"logits/chosen": -2.8669936656951904,
"logits/rejected": -4.86910343170166,
"logps/chosen": -4899.3330078125,
"logps/rejected": -32.84577560424805,
"loss": 0.0013,
"rewards/accuracies": 1.0,
"rewards/chosen": 56.820987701416016,
"rewards/margins": 56.85633850097656,
"rewards/rejected": -0.035354480147361755,
"step": 232
},
{
"epoch": 0.99,
"learning_rate": 2.2614213197969543e-05,
"logits/chosen": -3.1831815242767334,
"logits/rejected": -4.907491207122803,
"logps/chosen": -5208.95263671875,
"logps/rejected": -32.94639587402344,
"loss": 0.006,
"rewards/accuracies": 1.0,
"rewards/chosen": 65.99440002441406,
"rewards/margins": 66.03594970703125,
"rewards/rejected": -0.04156281799077988,
"step": 290
},
{
"epoch": 1.19,
"learning_rate": 2.0406091370558378e-05,
"logits/chosen": -2.9403574466705322,
"logits/rejected": -4.912071704864502,
"logps/chosen": -4351.31201171875,
"logps/rejected": -33.200740814208984,
"loss": 0.0012,
"rewards/accuracies": 1.0,
"rewards/chosen": 59.315834045410156,
"rewards/margins": 59.37520217895508,
"rewards/rejected": -0.05936765670776367,
"step": 348
},
{
"epoch": 1.39,
"learning_rate": 1.819796954314721e-05,
"logits/chosen": -2.886042594909668,
"logits/rejected": -4.924810409545898,
"logps/chosen": -4893.87060546875,
"logps/rejected": -33.106529235839844,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": 72.36699676513672,
"rewards/margins": 72.43253326416016,
"rewards/rejected": -0.06554649025201797,
"step": 406
},
{
"epoch": 1.59,
"learning_rate": 1.5989847715736043e-05,
"logits/chosen": -2.999485492706299,
"logits/rejected": -4.834668159484863,
"logps/chosen": -5137.2041015625,
"logps/rejected": -33.30693817138672,
"loss": 0.001,
"rewards/accuracies": 1.0,
"rewards/chosen": 78.8132553100586,
"rewards/margins": 78.88518524169922,
"rewards/rejected": -0.07193376123905182,
"step": 464
},
{
"epoch": 1.79,
"learning_rate": 1.3781725888324872e-05,
"logits/chosen": -2.787987470626831,
"logits/rejected": -4.926151275634766,
"logps/chosen": -4610.828125,
"logps/rejected": -33.529579162597656,
"loss": 0.004,
"rewards/accuracies": 1.0,
"rewards/chosen": 73.21186065673828,
"rewards/margins": 73.2912826538086,
"rewards/rejected": -0.0794229581952095,
"step": 522
},
{
"epoch": 1.99,
"learning_rate": 1.1573604060913705e-05,
"logits/chosen": -2.958709955215454,
"logits/rejected": -4.893362045288086,
"logps/chosen": -4559.85302734375,
"logps/rejected": -33.20284652709961,
"loss": 0.0066,
"rewards/accuracies": 1.0,
"rewards/chosen": 76.01142883300781,
"rewards/margins": 76.10553741455078,
"rewards/rejected": -0.09409420937299728,
"step": 580
},
{
"epoch": 2.18,
"learning_rate": 9.365482233502538e-06,
"logits/chosen": -2.9004671573638916,
"logits/rejected": -4.998195648193359,
"logps/chosen": -4775.02197265625,
"logps/rejected": -33.74284744262695,
"loss": 0.0036,
"rewards/accuracies": 1.0,
"rewards/chosen": 79.64250946044922,
"rewards/margins": 79.75552368164062,
"rewards/rejected": -0.11301343142986298,
"step": 638
},
{
"epoch": 2.38,
"learning_rate": 7.15736040609137e-06,
"logits/chosen": -2.8723161220550537,
"logits/rejected": -4.972283363342285,
"logps/chosen": -4627.41357421875,
"logps/rejected": -33.839881896972656,
"loss": 0.0054,
"rewards/accuracies": 1.0,
"rewards/chosen": 80.74669647216797,
"rewards/margins": 80.86132049560547,
"rewards/rejected": -0.11461903154850006,
"step": 696
},
{
"epoch": 2.58,
"learning_rate": 4.949238578680203e-06,
"logits/chosen": -2.920714855194092,
"logits/rejected": -4.981288433074951,
"logps/chosen": -4183.37890625,
"logps/rejected": -33.559993743896484,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/chosen": 75.54529571533203,
"rewards/margins": 75.672607421875,
"rewards/rejected": -0.127317875623703,
"step": 754
},
{
"epoch": 2.78,
"learning_rate": 2.7411167512690357e-06,
"logits/chosen": -3.071594715118408,
"logits/rejected": -5.01361608505249,
"logps/chosen": -4776.15234375,
"logps/rejected": -34.06298065185547,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": 84.52633666992188,
"rewards/margins": 84.65997314453125,
"rewards/rejected": -0.13363485038280487,
"step": 812
},
{
"epoch": 2.98,
"learning_rate": 5.329949238578681e-07,
"logits/chosen": -2.9292426109313965,
"logits/rejected": -4.913326740264893,
"logps/chosen": -4833.65625,
"logps/rejected": -33.5969123840332,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 87.57024383544922,
"rewards/margins": 87.6995620727539,
"rewards/rejected": -0.12932546436786652,
"step": 870
}
],
"logging_steps": 58,
"max_steps": 876,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}