|
{ |
|
"epoch": 1.0, |
|
"eval_dpo_losses": 0.6691617965698242, |
|
"eval_logits/chosen": -2.305570602416992, |
|
"eval_logits/rejected": -2.1975905895233154, |
|
"eval_logps/chosen": -262.83721923828125, |
|
"eval_logps/rejected": -254.97503662109375, |
|
"eval_loss": 0.6798878312110901, |
|
"eval_positive_losses": 0.08152038604021072, |
|
"eval_rewards/accuracies": 0.7123016119003296, |
|
"eval_rewards/chosen": 0.12936948239803314, |
|
"eval_rewards/margins": 0.05091732367873192, |
|
"eval_rewards/margins_max": 0.19700397551059723, |
|
"eval_rewards/margins_min": -0.07996664941310883, |
|
"eval_rewards/margins_std": 0.09219963103532791, |
|
"eval_rewards/rejected": 0.07845214754343033, |
|
"eval_runtime": 387.9178, |
|
"eval_samples": 2000, |
|
"eval_samples_per_second": 5.156, |
|
"eval_steps_per_second": 0.162 |
|
} |