llama-3-orpo-qlora / eval_results.json
dchoi44's picture
End of training
ada2921 verified
raw
history blame
725 Bytes
{
"epoch": 4.999237456153729,
"eval_log_odds_chosen": 2.101734161376953,
"eval_log_odds_ratio": -0.39467301964759827,
"eval_logits/chosen": -0.9354549646377563,
"eval_logits/rejected": -1.034712791442871,
"eval_logps/chosen": -0.8229629993438721,
"eval_logps/rejected": -2.495842695236206,
"eval_loss": 1.0581330060958862,
"eval_nll_loss": 1.0625019073486328,
"eval_rewards/accuracies": 0.7878788113594055,
"eval_rewards/chosen": -0.08229630440473557,
"eval_rewards/margins": 0.16728799045085907,
"eval_rewards/rejected": -0.24958431720733643,
"eval_runtime": 394.4103,
"eval_samples": 393,
"eval_samples_per_second": 0.996,
"eval_steps_per_second": 0.167
}