zephyr-dpo-qlora-uf-5e-6 / eval_results.json
just1nseo's picture
End of training
5631881 verified
raw
history blame contribute delete
724 Bytes
{
"epoch": 1.0,
"eval_logits/chosen": -1.7960704565048218,
"eval_logits/rejected": -1.7367748022079468,
"eval_logps/chosen": -574.2586059570312,
"eval_logps/rejected": -669.3330078125,
"eval_loss": 0.48904451727867126,
"eval_rewards/accuracies": 0.7797619104385376,
"eval_rewards/chosen": -2.897712469100952,
"eval_rewards/margins": 1.1741894483566284,
"eval_rewards/margins_max": 3.686448097229004,
"eval_rewards/margins_min": -0.9274057745933533,
"eval_rewards/margins_std": 1.5325316190719604,
"eval_rewards/rejected": -4.071901798248291,
"eval_runtime": 223.8559,
"eval_samples": 2000,
"eval_samples_per_second": 8.934,
"eval_steps_per_second": 0.281
}