|
{ |
|
"best_metric": 0.6417509913444519, |
|
"best_model_checkpoint": "./Zephyr/14-03-24-Weni-WeniGPT-2.4.1-Zephyr-7B-5-epochs-LLM_Base_2.0.3_DPO_WeniGPT DPO training-2_max_steps-445_batch_32_2024-03-14_ppid_9/checkpoint-100", |
|
"epoch": 1.1173184357541899, |
|
"eval_steps": 100, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.764265537261963, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"logits/chosen": -2.2127978801727295, |
|
"logits/rejected": -2.2138168811798096, |
|
"logps/chosen": -6.020094871520996, |
|
"logps/rejected": -10.437583923339844, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.05781250074505806, |
|
"rewards/chosen": 0.008710675872862339, |
|
"rewards/margins": 0.011647692881524563, |
|
"rewards/rejected": -0.002937017474323511, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.008939280174672604, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"logits/chosen": -2.209024429321289, |
|
"logits/rejected": -2.209834575653076, |
|
"logps/chosen": -4.6792497634887695, |
|
"logps/rejected": -9.106904983520508, |
|
"loss": 0.6701, |
|
"rewards/accuracies": 0.04531250149011612, |
|
"rewards/chosen": 0.09174498170614243, |
|
"rewards/margins": 0.13157977163791656, |
|
"rewards/rejected": -0.039834775030612946, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.14188189570541e-07, |
|
"learning_rate": 4.8125000000000004e-05, |
|
"logits/chosen": -2.140080213546753, |
|
"logits/rejected": -2.140465259552002, |
|
"logps/chosen": -5.5485029220581055, |
|
"logps/rejected": -12.926416397094727, |
|
"loss": 0.6585, |
|
"rewards/accuracies": 0.05156249925494194, |
|
"rewards/chosen": -0.03698154538869858, |
|
"rewards/margins": 0.39537638425827026, |
|
"rewards/rejected": -0.43235793709754944, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 16.8149471282959, |
|
"learning_rate": 4.575e-05, |
|
"logits/chosen": -2.026047945022583, |
|
"logits/rejected": -2.025956392288208, |
|
"logps/chosen": -10.022688865661621, |
|
"logps/rejected": -24.660259246826172, |
|
"loss": 0.6669, |
|
"rewards/accuracies": 0.05781250074505806, |
|
"rewards/chosen": -0.3339082598686218, |
|
"rewards/margins": 1.0433582067489624, |
|
"rewards/rejected": -1.377266526222229, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.325e-05, |
|
"logits/chosen": -1.9624429941177368, |
|
"logits/rejected": -1.9612023830413818, |
|
"logps/chosen": -10.355883598327637, |
|
"logps/rejected": -26.145648956298828, |
|
"loss": 0.6643, |
|
"rewards/accuracies": 0.05312500149011612, |
|
"rewards/chosen": -0.4869672358036041, |
|
"rewards/margins": 1.212210774421692, |
|
"rewards/rejected": -1.6991779804229736, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_logits/chosen": -1.8881776332855225, |
|
"eval_logits/rejected": -1.8856115341186523, |
|
"eval_logps/chosen": -17.27315902709961, |
|
"eval_logps/rejected": -45.2144889831543, |
|
"eval_loss": 0.6417509913444519, |
|
"eval_rewards/accuracies": 0.07604166865348816, |
|
"eval_rewards/chosen": -0.8916252851486206, |
|
"eval_rewards/margins": 2.1516268253326416, |
|
"eval_rewards/rejected": -3.0432522296905518, |
|
"eval_runtime": 183.8919, |
|
"eval_samples_per_second": 1.729, |
|
"eval_steps_per_second": 0.218, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 445, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|