{ "epoch": 4.999237456153729, "eval_log_odds_chosen": 2.101734161376953, "eval_log_odds_ratio": -0.39467301964759827, "eval_logits/chosen": -0.9354549646377563, "eval_logits/rejected": -1.034712791442871, "eval_logps/chosen": -0.8229629993438721, "eval_logps/rejected": -2.495842695236206, "eval_loss": 1.0581330060958862, "eval_nll_loss": 1.0625019073486328, "eval_rewards/accuracies": 0.7878788113594055, "eval_rewards/chosen": -0.08229630440473557, "eval_rewards/margins": 0.16728799045085907, "eval_rewards/rejected": -0.24958431720733643, "eval_runtime": 394.4103, "eval_samples": 393, "eval_samples_per_second": 0.996, "eval_steps_per_second": 0.167 }