|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 52, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 6.388934227309893, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/chosen": -1.2247042655944824, |
|
"logits/rejected": -1.0684211254119873, |
|
"logps/chosen": -569.8499145507812, |
|
"logps/rejected": -1057.6484375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 6.575619838412587, |
|
"learning_rate": 4.907293218369498e-07, |
|
"logits/chosen": -1.1266288757324219, |
|
"logits/rejected": -0.9588133692741394, |
|
"logps/chosen": -644.7501831054688, |
|
"logps/rejected": -896.37548828125, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.5347222089767456, |
|
"rewards/chosen": -0.0003651145671028644, |
|
"rewards/margins": 0.003978920169174671, |
|
"rewards/rejected": -0.004344034940004349, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 6.33044990257594, |
|
"learning_rate": 3.941700805287168e-07, |
|
"logits/chosen": -1.1694377660751343, |
|
"logits/rejected": -0.9742730855941772, |
|
"logps/chosen": -553.2115478515625, |
|
"logps/rejected": -933.1784057617188, |
|
"loss": 0.66, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.0070250085555016994, |
|
"rewards/margins": 0.06466411054134369, |
|
"rewards/rejected": -0.07168911397457123, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 7.898094155565296, |
|
"learning_rate": 2.3293939665883228e-07, |
|
"logits/chosen": -1.121983528137207, |
|
"logits/rejected": -0.9881827235221863, |
|
"logps/chosen": -594.9716186523438, |
|
"logps/rejected": -1039.31982421875, |
|
"loss": 0.5638, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.06966790556907654, |
|
"rewards/margins": 0.3061855733394623, |
|
"rewards/rejected": -0.3758534789085388, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 9.30835250885683, |
|
"learning_rate": 7.936171419533652e-08, |
|
"logits/chosen": -1.0671305656433105, |
|
"logits/rejected": -0.9684460759162903, |
|
"logps/chosen": -680.4083251953125, |
|
"logps/rejected": -1006.2408447265625, |
|
"loss": 0.4643, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20575208961963654, |
|
"rewards/margins": 0.7841703295707703, |
|
"rewards/rejected": -0.9899223446846008, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 8.502404185464833, |
|
"learning_rate": 2.328513490917311e-09, |
|
"logits/chosen": -1.0291626453399658, |
|
"logits/rejected": -0.9718970060348511, |
|
"logps/chosen": -671.4656372070312, |
|
"logps/rejected": -1046.7633056640625, |
|
"loss": 0.4299, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.2569717466831207, |
|
"rewards/margins": 0.8754018545150757, |
|
"rewards/rejected": -1.132373571395874, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 52, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5552032154340011, |
|
"train_runtime": 696.2336, |
|
"train_samples_per_second": 4.777, |
|
"train_steps_per_second": 0.075 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 52, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|