{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 876, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 1.7045454545454546e-05, "logits/chosen": -2.9543049335479736, "logits/rejected": -4.587946891784668, "logps/chosen": -4854.5478515625, "logps/rejected": -32.31528854370117, "loss": 0.3787, "rewards/accuracies": 0.8017241358757019, "rewards/chosen": 4.192387104034424, "rewards/margins": 4.192722797393799, "rewards/rejected": -0.0003354697546456009, "step": 58 }, { "epoch": 0.4, "learning_rate": 2.9238578680203047e-05, "logits/chosen": -3.0135436058044434, "logits/rejected": -4.595321178436279, "logps/chosen": -5432.6845703125, "logps/rejected": -32.69685363769531, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/chosen": 29.855911254882812, "rewards/margins": 29.8657283782959, "rewards/rejected": -0.009818021208047867, "step": 116 }, { "epoch": 0.6, "learning_rate": 2.7030456852791878e-05, "logits/chosen": -2.929417133331299, "logits/rejected": -4.776305198669434, "logps/chosen": -4491.6943359375, "logps/rejected": -32.90016174316406, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": 42.8309440612793, "rewards/margins": 42.856590270996094, "rewards/rejected": -0.02564803883433342, "step": 174 }, { "epoch": 0.79, "learning_rate": 2.4822335025380712e-05, "logits/chosen": -2.8669936656951904, "logits/rejected": -4.86910343170166, "logps/chosen": -4899.3330078125, "logps/rejected": -32.84577560424805, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": 56.820987701416016, "rewards/margins": 56.85633850097656, "rewards/rejected": -0.035354480147361755, "step": 232 }, { "epoch": 0.99, "learning_rate": 2.2614213197969543e-05, "logits/chosen": -3.1831815242767334, "logits/rejected": -4.907491207122803, "logps/chosen": -5208.95263671875, "logps/rejected": -32.94639587402344, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": 65.99440002441406, "rewards/margins": 66.03594970703125, "rewards/rejected": -0.04156281799077988, "step": 290 }, { "epoch": 1.19, "learning_rate": 2.0406091370558378e-05, "logits/chosen": -2.9403574466705322, "logits/rejected": -4.912071704864502, "logps/chosen": -4351.31201171875, "logps/rejected": -33.200740814208984, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": 59.315834045410156, "rewards/margins": 59.37520217895508, "rewards/rejected": -0.05936765670776367, "step": 348 }, { "epoch": 1.39, "learning_rate": 1.819796954314721e-05, "logits/chosen": -2.886042594909668, "logits/rejected": -4.924810409545898, "logps/chosen": -4893.87060546875, "logps/rejected": -33.106529235839844, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 72.36699676513672, "rewards/margins": 72.43253326416016, "rewards/rejected": -0.06554649025201797, "step": 406 }, { "epoch": 1.59, "learning_rate": 1.5989847715736043e-05, "logits/chosen": -2.999485492706299, "logits/rejected": -4.834668159484863, "logps/chosen": -5137.2041015625, "logps/rejected": -33.30693817138672, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/chosen": 78.8132553100586, "rewards/margins": 78.88518524169922, "rewards/rejected": -0.07193376123905182, "step": 464 }, { "epoch": 1.79, "learning_rate": 1.3781725888324872e-05, "logits/chosen": -2.787987470626831, "logits/rejected": -4.926151275634766, "logps/chosen": -4610.828125, "logps/rejected": -33.529579162597656, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": 73.21186065673828, "rewards/margins": 73.2912826538086, "rewards/rejected": -0.0794229581952095, "step": 522 }, { "epoch": 1.99, "learning_rate": 1.1573604060913705e-05, "logits/chosen": -2.958709955215454, "logits/rejected": -4.893362045288086, "logps/chosen": -4559.85302734375, "logps/rejected": -33.20284652709961, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": 76.01142883300781, "rewards/margins": 76.10553741455078, "rewards/rejected": -0.09409420937299728, "step": 580 }, { "epoch": 2.18, "learning_rate": 9.365482233502538e-06, "logits/chosen": -2.9004671573638916, "logits/rejected": -4.998195648193359, "logps/chosen": -4775.02197265625, "logps/rejected": -33.74284744262695, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": 79.64250946044922, "rewards/margins": 79.75552368164062, "rewards/rejected": -0.11301343142986298, "step": 638 }, { "epoch": 2.38, "learning_rate": 7.15736040609137e-06, "logits/chosen": -2.8723161220550537, "logits/rejected": -4.972283363342285, "logps/chosen": -4627.41357421875, "logps/rejected": -33.839881896972656, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": 80.74669647216797, "rewards/margins": 80.86132049560547, "rewards/rejected": -0.11461903154850006, "step": 696 }, { "epoch": 2.58, "learning_rate": 4.949238578680203e-06, "logits/chosen": -2.920714855194092, "logits/rejected": -4.981288433074951, "logps/chosen": -4183.37890625, "logps/rejected": -33.559993743896484, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": 75.54529571533203, "rewards/margins": 75.672607421875, "rewards/rejected": -0.127317875623703, "step": 754 }, { "epoch": 2.78, "learning_rate": 2.7411167512690357e-06, "logits/chosen": -3.071594715118408, "logits/rejected": -5.01361608505249, "logps/chosen": -4776.15234375, "logps/rejected": -34.06298065185547, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": 84.52633666992188, "rewards/margins": 84.65997314453125, "rewards/rejected": -0.13363485038280487, "step": 812 }, { "epoch": 2.98, "learning_rate": 5.329949238578681e-07, "logits/chosen": -2.9292426109313965, "logits/rejected": -4.913326740264893, "logps/chosen": -4833.65625, "logps/rejected": -33.5969123840332, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/chosen": 87.57024383544922, "rewards/margins": 87.6995620727539, "rewards/rejected": -0.12932546436786652, "step": 870 } ], "logging_steps": 58, "max_steps": 876, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }