{ "epoch": 1.0, "eval_logits/chosen": -1.7960704565048218, "eval_logits/rejected": -1.7367748022079468, "eval_logps/chosen": -574.2586059570312, "eval_logps/rejected": -669.3330078125, "eval_loss": 0.48904451727867126, "eval_rewards/accuracies": 0.7797619104385376, "eval_rewards/chosen": -2.897712469100952, "eval_rewards/margins": 1.1741894483566284, "eval_rewards/margins_max": 3.686448097229004, "eval_rewards/margins_min": -0.9274057745933533, "eval_rewards/margins_std": 1.5325316190719604, "eval_rewards/rejected": -4.071901798248291, "eval_runtime": 223.8559, "eval_samples": 2000, "eval_samples_per_second": 8.934, "eval_steps_per_second": 0.281 }