{ "epoch": 2.9904761904761905, "eval_log_odds_chosen": 0.367882639169693, "eval_log_odds_ratio": -0.6760825514793396, "eval_logits/chosen": 299.86248779296875, "eval_logits/rejected": 270.96282958984375, "eval_logps/chosen": -1.1505995988845825, "eval_logps/rejected": -1.3976730108261108, "eval_loss": 1.59840989112854, "eval_nll_loss": 1.5312451124191284, "eval_rewards/accuracies": 0.5899280309677124, "eval_rewards/chosen": -0.057529982179403305, "eval_rewards/margins": 0.012353661470115185, "eval_rewards/rejected": -0.06988365203142166, "eval_runtime": 112.534, "eval_samples": 553, "eval_samples_per_second": 4.914, "eval_steps_per_second": 1.235 }