beamaia's picture
Upload folder using huggingface_hub
9d5fc71 verified
{
"best_metric": 0.6417509913444519,
"best_model_checkpoint": "./Zephyr/14-03-24-Weni-WeniGPT-2.4.1-Zephyr-7B-5-epochs-LLM_Base_2.0.3_DPO_WeniGPT DPO training-2_max_steps-445_batch_32_2024-03-14_ppid_9/checkpoint-100",
"epoch": 1.1173184357541899,
"eval_steps": 100,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.22,
"grad_norm": 1.764265537261963,
"learning_rate": 2.2222222222222223e-05,
"logits/chosen": -2.2127978801727295,
"logits/rejected": -2.2138168811798096,
"logps/chosen": -6.020094871520996,
"logps/rejected": -10.437583923339844,
"loss": 0.6881,
"rewards/accuracies": 0.05781250074505806,
"rewards/chosen": 0.008710675872862339,
"rewards/margins": 0.011647692881524563,
"rewards/rejected": -0.002937017474323511,
"step": 20
},
{
"epoch": 0.45,
"grad_norm": 0.008939280174672604,
"learning_rate": 4.4444444444444447e-05,
"logits/chosen": -2.209024429321289,
"logits/rejected": -2.209834575653076,
"logps/chosen": -4.6792497634887695,
"logps/rejected": -9.106904983520508,
"loss": 0.6701,
"rewards/accuracies": 0.04531250149011612,
"rewards/chosen": 0.09174498170614243,
"rewards/margins": 0.13157977163791656,
"rewards/rejected": -0.039834775030612946,
"step": 40
},
{
"epoch": 0.67,
"grad_norm": 1.14188189570541e-07,
"learning_rate": 4.8125000000000004e-05,
"logits/chosen": -2.140080213546753,
"logits/rejected": -2.140465259552002,
"logps/chosen": -5.5485029220581055,
"logps/rejected": -12.926416397094727,
"loss": 0.6585,
"rewards/accuracies": 0.05156249925494194,
"rewards/chosen": -0.03698154538869858,
"rewards/margins": 0.39537638425827026,
"rewards/rejected": -0.43235793709754944,
"step": 60
},
{
"epoch": 0.89,
"grad_norm": 16.8149471282959,
"learning_rate": 4.575e-05,
"logits/chosen": -2.026047945022583,
"logits/rejected": -2.025956392288208,
"logps/chosen": -10.022688865661621,
"logps/rejected": -24.660259246826172,
"loss": 0.6669,
"rewards/accuracies": 0.05781250074505806,
"rewards/chosen": -0.3339082598686218,
"rewards/margins": 1.0433582067489624,
"rewards/rejected": -1.377266526222229,
"step": 80
},
{
"epoch": 1.12,
"grad_norm": 0.0,
"learning_rate": 4.325e-05,
"logits/chosen": -1.9624429941177368,
"logits/rejected": -1.9612023830413818,
"logps/chosen": -10.355883598327637,
"logps/rejected": -26.145648956298828,
"loss": 0.6643,
"rewards/accuracies": 0.05312500149011612,
"rewards/chosen": -0.4869672358036041,
"rewards/margins": 1.212210774421692,
"rewards/rejected": -1.6991779804229736,
"step": 100
},
{
"epoch": 1.12,
"eval_logits/chosen": -1.8881776332855225,
"eval_logits/rejected": -1.8856115341186523,
"eval_logps/chosen": -17.27315902709961,
"eval_logps/rejected": -45.2144889831543,
"eval_loss": 0.6417509913444519,
"eval_rewards/accuracies": 0.07604166865348816,
"eval_rewards/chosen": -0.8916252851486206,
"eval_rewards/margins": 2.1516268253326416,
"eval_rewards/rejected": -3.0432522296905518,
"eval_runtime": 183.8919,
"eval_samples_per_second": 1.729,
"eval_steps_per_second": 0.218,
"step": 100
}
],
"logging_steps": 20,
"max_steps": 445,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}