{ "epoch": 1.0, "eval_logps/chosen": -2.755657434463501, "eval_logps/rejected": -1.0496879816055298, "eval_loss": 21.31629180908203, "eval_rewards/accuracies": 0.0, "eval_rewards/chosen": -27.55657386779785, "eval_rewards/margins": -17.059694290161133, "eval_rewards/rejected": -10.496879577636719, "eval_runtime": 4.3483, "eval_samples": 12, "eval_samples_per_second": 2.76, "eval_steps_per_second": 0.69, "total_flos": 0.0, "train_loss": 22.58866818745931, "train_runtime": 4347.6828, "train_samples": 5640, "train_samples_per_second": 1.297, "train_steps_per_second": 0.13 }