{ "epoch": 1.0, "eval_logits/chosen": 4.460368633270264, "eval_logits/rejected": 4.594798564910889, "eval_logps/chosen": -380.465087890625, "eval_logps/rejected": -579.1896362304688, "eval_loss": 0.4399436116218567, "eval_pred_label": 2212.28125, "eval_rewards/accuracies": 0.33984375, "eval_rewards/chosen": -3.1655280590057373, "eval_rewards/margins": 1.8544387817382812, "eval_rewards/rejected": -5.019967079162598, "eval_runtime": 125.2743, "eval_samples": 2000, "eval_samples_per_second": 15.965, "eval_steps_per_second": 0.255, "eval_use_label": 6575.71875 }