|
{ |
|
"best_metric": 0.4733360707759857, |
|
"best_model_checkpoint": "./mixstral/05-04-24-Weni-WeniGPT-Agents-Mixstral-Instruct-2.0.1-KTO_KTO with Agents 1.2.0 dataset and Mixstral model, with tokenization zephyr chat template-3_max_steps-145_batch_16_2024-04-05_ppid_9/checkpoint-100", |
|
"epoch": 0.684931506849315, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": Infinity, |
|
"kl": 16.759851455688477, |
|
"learning_rate": 0.00018, |
|
"logps/chosen": -217.2803497314453, |
|
"logps/rejected": -266.78955078125, |
|
"loss": 0.4135, |
|
"rewards/chosen": 2.8302645683288574, |
|
"rewards/margins": 1.9116979837417603, |
|
"rewards/rejected": 1.01555597782135, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 4.954351902008057, |
|
"kl": 0.40393954515457153, |
|
"learning_rate": 0.00015142857142857143, |
|
"logps/chosen": -323.0667419433594, |
|
"logps/rejected": -331.9349365234375, |
|
"loss": 0.427, |
|
"rewards/chosen": -5.341065883636475, |
|
"rewards/margins": 2.56813383102417, |
|
"rewards/rejected": -8.01456356048584, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_kl": 0.31787964701652527, |
|
"eval_logps/chosen": -276.96771240234375, |
|
"eval_logps/rejected": -285.83453369140625, |
|
"eval_loss": 0.4442897439002991, |
|
"eval_rewards/chosen": -2.62730073928833, |
|
"eval_rewards/margins": 1.0439932346343994, |
|
"eval_rewards/rejected": -3.7775051593780518, |
|
"eval_runtime": 357.2813, |
|
"eval_samples_per_second": 0.84, |
|
"eval_steps_per_second": 0.21, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.9756174879148602e-05, |
|
"kl": 0.9693483114242554, |
|
"learning_rate": 0.00012571428571428572, |
|
"logps/chosen": -549.1575927734375, |
|
"logps/rejected": -583.3649291992188, |
|
"loss": 0.4167, |
|
"rewards/chosen": -29.825927734375, |
|
"rewards/margins": 3.2253201007843018, |
|
"rewards/rejected": -32.24267578125, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.4999230870671454e-06, |
|
"kl": 0.0, |
|
"learning_rate": 9.714285714285715e-05, |
|
"logps/chosen": -1577.5938720703125, |
|
"logps/rejected": -1497.0914306640625, |
|
"loss": 0.4625, |
|
"rewards/chosen": -132.58737182617188, |
|
"rewards/margins": -8.486028671264648, |
|
"rewards/rejected": -123.95598602294922, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.0, |
|
"kl": 0.0, |
|
"learning_rate": 6.857142857142858e-05, |
|
"logps/chosen": -1709.744140625, |
|
"logps/rejected": -1653.6820068359375, |
|
"loss": 0.425, |
|
"rewards/chosen": -144.38365173339844, |
|
"rewards/margins": -5.729578971862793, |
|
"rewards/rejected": -138.68692016601562, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_kl": 0.0, |
|
"eval_logps/chosen": -1588.1917724609375, |
|
"eval_logps/rejected": -1473.307373046875, |
|
"eval_loss": 0.4733360707759857, |
|
"eval_rewards/chosen": -133.74969482421875, |
|
"eval_rewards/margins": -10.205015182495117, |
|
"eval_rewards/rejected": -122.52478790283203, |
|
"eval_runtime": 356.0942, |
|
"eval_samples_per_second": 0.842, |
|
"eval_steps_per_second": 0.211, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 145, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|