|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9905956112852664, |
|
"eval_steps": 500, |
|
"global_step": 79, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -2.4037704467773438, |
|
"logits/rejected": -2.4510624408721924, |
|
"logps/chosen": -452.3387145996094, |
|
"logps/rejected": -474.736572265625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990217055187362e-07, |
|
"logits/chosen": -2.3159937858581543, |
|
"logits/rejected": -2.326166868209839, |
|
"logps/chosen": -363.0790100097656, |
|
"logps/rejected": -444.7694091796875, |
|
"loss": 0.6767, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03609583526849747, |
|
"rewards/margins": 0.03676144406199455, |
|
"rewards/rejected": -0.07285728305578232, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.655786431300069e-07, |
|
"logits/chosen": -2.0593743324279785, |
|
"logits/rejected": -1.9507596492767334, |
|
"logps/chosen": -487.2562561035156, |
|
"logps/rejected": -611.5640869140625, |
|
"loss": 0.5575, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -1.0315128564834595, |
|
"rewards/margins": 0.6375976800918579, |
|
"rewards/rejected": -1.6691105365753174, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.9061232191019517e-07, |
|
"logits/chosen": -1.8185665607452393, |
|
"logits/rejected": -1.7463910579681396, |
|
"logps/chosen": -481.860107421875, |
|
"logps/rejected": -633.5632934570312, |
|
"loss": 0.5326, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -1.2174549102783203, |
|
"rewards/margins": 0.8040310144424438, |
|
"rewards/rejected": -2.0214858055114746, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.8856223324132555e-07, |
|
"logits/chosen": -1.8887481689453125, |
|
"logits/rejected": -1.7590510845184326, |
|
"logps/chosen": -461.4515075683594, |
|
"logps/rejected": -576.8236083984375, |
|
"loss": 0.4994, |
|
"rewards/accuracies": 0.7593749761581421, |
|
"rewards/chosen": -0.7517538070678711, |
|
"rewards/margins": 0.758992075920105, |
|
"rewards/rejected": -1.510745882987976, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7908455541642582e-07, |
|
"logits/chosen": -1.7753263711929321, |
|
"logits/rejected": -1.5831645727157593, |
|
"logps/chosen": -477.20263671875, |
|
"logps/rejected": -597.608642578125, |
|
"loss": 0.4955, |
|
"rewards/accuracies": 0.796875, |
|
"rewards/chosen": -0.8421181440353394, |
|
"rewards/margins": 0.8058697581291199, |
|
"rewards/rejected": -1.647987961769104, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.32661172908373e-08, |
|
"logits/chosen": -1.562878966331482, |
|
"logits/rejected": -1.4060611724853516, |
|
"logps/chosen": -438.7434997558594, |
|
"logps/rejected": -601.6849365234375, |
|
"loss": 0.4876, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8998396992683411, |
|
"rewards/margins": 0.9105955362319946, |
|
"rewards/rejected": -1.8104350566864014, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.956279997278043e-08, |
|
"logits/chosen": -1.5130259990692139, |
|
"logits/rejected": -1.2868883609771729, |
|
"logps/chosen": -486.8975524902344, |
|
"logps/rejected": -630.5179443359375, |
|
"loss": 0.4762, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -1.0723719596862793, |
|
"rewards/margins": 0.8739804029464722, |
|
"rewards/rejected": -1.9463523626327515, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"step": 79, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5280408738534662, |
|
"train_runtime": 2579.8853, |
|
"train_samples_per_second": 7.899, |
|
"train_steps_per_second": 0.031 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 79, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|