{ "epoch": 3.0, "eval_logits/chosen": -2.198760986328125, "eval_logits/rejected": -2.1614327430725098, "eval_logps/chosen": -278.5389709472656, "eval_logps/rejected": -310.5012512207031, "eval_loss": 0.6269853711128235, "eval_rewards/accuracies": 0.6924999952316284, "eval_rewards/chosen": -7.6611433029174805, "eval_rewards/margins": 4.435902118682861, "eval_rewards/rejected": -12.0970458984375, "eval_runtime": 132.6261, "eval_samples": 3156, "eval_samples_per_second": 23.796, "eval_steps_per_second": 0.377, "train_loss": 0.24049862180692672, "train_runtime": 20948.1804, "train_samples": 82424, "train_samples_per_second": 9.519, "train_steps_per_second": 0.595 }