|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 876, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.7045454545454546e-05, |
|
"logits/chosen": -2.9543049335479736, |
|
"logits/rejected": -4.587946891784668, |
|
"logps/chosen": -4854.5478515625, |
|
"logps/rejected": -32.31528854370117, |
|
"loss": 0.3787, |
|
"rewards/accuracies": 0.8017241358757019, |
|
"rewards/chosen": 4.192387104034424, |
|
"rewards/margins": 4.192722797393799, |
|
"rewards/rejected": -0.0003354697546456009, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.9238578680203047e-05, |
|
"logits/chosen": -3.0135436058044434, |
|
"logits/rejected": -4.595321178436279, |
|
"logps/chosen": -5432.6845703125, |
|
"logps/rejected": -32.69685363769531, |
|
"loss": 0.0201, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.855911254882812, |
|
"rewards/margins": 29.8657283782959, |
|
"rewards/rejected": -0.009818021208047867, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.7030456852791878e-05, |
|
"logits/chosen": -2.929417133331299, |
|
"logits/rejected": -4.776305198669434, |
|
"logps/chosen": -4491.6943359375, |
|
"logps/rejected": -32.90016174316406, |
|
"loss": 0.0064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 42.8309440612793, |
|
"rewards/margins": 42.856590270996094, |
|
"rewards/rejected": -0.02564803883433342, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.4822335025380712e-05, |
|
"logits/chosen": -2.8669936656951904, |
|
"logits/rejected": -4.86910343170166, |
|
"logps/chosen": -4899.3330078125, |
|
"logps/rejected": -32.84577560424805, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 56.820987701416016, |
|
"rewards/margins": 56.85633850097656, |
|
"rewards/rejected": -0.035354480147361755, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.2614213197969543e-05, |
|
"logits/chosen": -3.1831815242767334, |
|
"logits/rejected": -4.907491207122803, |
|
"logps/chosen": -5208.95263671875, |
|
"logps/rejected": -32.94639587402344, |
|
"loss": 0.006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 65.99440002441406, |
|
"rewards/margins": 66.03594970703125, |
|
"rewards/rejected": -0.04156281799077988, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0406091370558378e-05, |
|
"logits/chosen": -2.9403574466705322, |
|
"logits/rejected": -4.912071704864502, |
|
"logps/chosen": -4351.31201171875, |
|
"logps/rejected": -33.200740814208984, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 59.315834045410156, |
|
"rewards/margins": 59.37520217895508, |
|
"rewards/rejected": -0.05936765670776367, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.819796954314721e-05, |
|
"logits/chosen": -2.886042594909668, |
|
"logits/rejected": -4.924810409545898, |
|
"logps/chosen": -4893.87060546875, |
|
"logps/rejected": -33.106529235839844, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 72.36699676513672, |
|
"rewards/margins": 72.43253326416016, |
|
"rewards/rejected": -0.06554649025201797, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.5989847715736043e-05, |
|
"logits/chosen": -2.999485492706299, |
|
"logits/rejected": -4.834668159484863, |
|
"logps/chosen": -5137.2041015625, |
|
"logps/rejected": -33.30693817138672, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 78.8132553100586, |
|
"rewards/margins": 78.88518524169922, |
|
"rewards/rejected": -0.07193376123905182, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.3781725888324872e-05, |
|
"logits/chosen": -2.787987470626831, |
|
"logits/rejected": -4.926151275634766, |
|
"logps/chosen": -4610.828125, |
|
"logps/rejected": -33.529579162597656, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 73.21186065673828, |
|
"rewards/margins": 73.2912826538086, |
|
"rewards/rejected": -0.0794229581952095, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.1573604060913705e-05, |
|
"logits/chosen": -2.958709955215454, |
|
"logits/rejected": -4.893362045288086, |
|
"logps/chosen": -4559.85302734375, |
|
"logps/rejected": -33.20284652709961, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 76.01142883300781, |
|
"rewards/margins": 76.10553741455078, |
|
"rewards/rejected": -0.09409420937299728, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 9.365482233502538e-06, |
|
"logits/chosen": -2.9004671573638916, |
|
"logits/rejected": -4.998195648193359, |
|
"logps/chosen": -4775.02197265625, |
|
"logps/rejected": -33.74284744262695, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 79.64250946044922, |
|
"rewards/margins": 79.75552368164062, |
|
"rewards/rejected": -0.11301343142986298, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 7.15736040609137e-06, |
|
"logits/chosen": -2.8723161220550537, |
|
"logits/rejected": -4.972283363342285, |
|
"logps/chosen": -4627.41357421875, |
|
"logps/rejected": -33.839881896972656, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 80.74669647216797, |
|
"rewards/margins": 80.86132049560547, |
|
"rewards/rejected": -0.11461903154850006, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.949238578680203e-06, |
|
"logits/chosen": -2.920714855194092, |
|
"logits/rejected": -4.981288433074951, |
|
"logps/chosen": -4183.37890625, |
|
"logps/rejected": -33.559993743896484, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 75.54529571533203, |
|
"rewards/margins": 75.672607421875, |
|
"rewards/rejected": -0.127317875623703, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.7411167512690357e-06, |
|
"logits/chosen": -3.071594715118408, |
|
"logits/rejected": -5.01361608505249, |
|
"logps/chosen": -4776.15234375, |
|
"logps/rejected": -34.06298065185547, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 84.52633666992188, |
|
"rewards/margins": 84.65997314453125, |
|
"rewards/rejected": -0.13363485038280487, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 5.329949238578681e-07, |
|
"logits/chosen": -2.9292426109313965, |
|
"logits/rejected": -4.913326740264893, |
|
"logps/chosen": -4833.65625, |
|
"logps/rejected": -33.5969123840332, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 87.57024383544922, |
|
"rewards/margins": 87.6995620727539, |
|
"rewards/rejected": -0.12932546436786652, |
|
"step": 870 |
|
} |
|
], |
|
"logging_steps": 58, |
|
"max_steps": 876, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|