|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9473684210526314, |
|
"eval_steps": 10, |
|
"global_step": 21, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14035087719298245, |
|
"grad_norm": 36.83965274682509, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"logits/chosen": -0.6751565337181091, |
|
"logits/rejected": -0.680110514163971, |
|
"logps/chosen": -52.487876892089844, |
|
"logps/rejected": -58.423255920410156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.2807017543859649, |
|
"grad_norm": 36.26353717517849, |
|
"learning_rate": 6.666666666666667e-06, |
|
"logits/chosen": -0.7261925339698792, |
|
"logits/rejected": -0.7052676677703857, |
|
"logps/chosen": -51.76400375366211, |
|
"logps/rejected": -58.045860290527344, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.42105263157894735, |
|
"grad_norm": 6.35145459559084, |
|
"learning_rate": 1e-05, |
|
"logits/chosen": -0.6268625259399414, |
|
"logits/rejected": -0.44894033670425415, |
|
"logps/chosen": -42.51791763305664, |
|
"logps/rejected": -77.13123321533203, |
|
"loss": 0.1248, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.8370370864868164, |
|
"rewards/margins": 2.879814386367798, |
|
"rewards/rejected": -2.0427772998809814, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.5614035087719298, |
|
"grad_norm": 6.045707313215622, |
|
"learning_rate": 9.924038765061042e-06, |
|
"logits/chosen": -0.49778643250465393, |
|
"logits/rejected": -0.3794001340866089, |
|
"logps/chosen": -40.093902587890625, |
|
"logps/rejected": -100.6180191040039, |
|
"loss": 0.0342, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0513410568237305, |
|
"rewards/margins": 5.476510047912598, |
|
"rewards/rejected": -4.425168514251709, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.7017543859649122, |
|
"grad_norm": 3.5732460989636903, |
|
"learning_rate": 9.698463103929542e-06, |
|
"logits/chosen": -0.1468430608510971, |
|
"logits/rejected": -0.014196997508406639, |
|
"logps/chosen": -47.64195251464844, |
|
"logps/rejected": -108.50719451904297, |
|
"loss": 0.035, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.4784576892852783, |
|
"rewards/margins": 5.548664093017578, |
|
"rewards/rejected": -5.070206165313721, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.8421052631578947, |
|
"grad_norm": 5.73507612352672, |
|
"learning_rate": 9.330127018922195e-06, |
|
"logits/chosen": -0.45869290828704834, |
|
"logits/rejected": -0.35349956154823303, |
|
"logps/chosen": -23.860214233398438, |
|
"logps/rejected": -72.7856216430664, |
|
"loss": 0.0381, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 2.7691407203674316, |
|
"rewards/margins": 4.281482696533203, |
|
"rewards/rejected": -1.512341856956482, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.9824561403508771, |
|
"grad_norm": 0.20133266492379556, |
|
"learning_rate": 8.83022221559489e-06, |
|
"logits/chosen": 0.02529796212911606, |
|
"logits/rejected": -0.027504097670316696, |
|
"logps/chosen": -20.159576416015625, |
|
"logps/rejected": -136.86343383789062, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.0319323539733887, |
|
"rewards/margins": 10.98936653137207, |
|
"rewards/rejected": -7.957433700561523, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.1228070175438596, |
|
"grad_norm": 1.538785447893909, |
|
"learning_rate": 8.213938048432697e-06, |
|
"logits/chosen": -0.09107446670532227, |
|
"logits/rejected": -0.030364712700247765, |
|
"logps/chosen": -31.587915420532227, |
|
"logps/rejected": -148.98675537109375, |
|
"loss": 0.0988, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 2.1359970569610596, |
|
"rewards/margins": 11.210424423217773, |
|
"rewards/rejected": -9.074427604675293, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 1.263157894736842, |
|
"grad_norm": 2.484440900292177, |
|
"learning_rate": 7.500000000000001e-06, |
|
"logits/chosen": -0.021966181695461273, |
|
"logits/rejected": -0.035571545362472534, |
|
"logps/chosen": -22.42191505432129, |
|
"logps/rejected": -139.77471923828125, |
|
"loss": 0.0482, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.9303808212280273, |
|
"rewards/margins": 11.172625541687012, |
|
"rewards/rejected": -8.242246627807617, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 1.4035087719298245, |
|
"grad_norm": 2.063248053979488, |
|
"learning_rate": 6.710100716628345e-06, |
|
"logits/chosen": -0.1402013897895813, |
|
"logits/rejected": -0.09476425498723984, |
|
"logps/chosen": -22.920854568481445, |
|
"logps/rejected": -139.89112854003906, |
|
"loss": 0.1011, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 2.8295440673828125, |
|
"rewards/margins": 11.169378280639648, |
|
"rewards/rejected": -8.339835166931152, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.4035087719298245, |
|
"eval_logits/chosen": -0.5006358027458191, |
|
"eval_logits/rejected": -0.5087898373603821, |
|
"eval_logps/chosen": -21.234792709350586, |
|
"eval_logps/rejected": -123.04338073730469, |
|
"eval_loss": 0.07855287194252014, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 3.130305767059326, |
|
"eval_rewards/margins": 9.576081275939941, |
|
"eval_rewards/rejected": -6.445775032043457, |
|
"eval_runtime": 16.4971, |
|
"eval_samples_per_second": 6.062, |
|
"eval_steps_per_second": 3.031, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.543859649122807, |
|
"grad_norm": 2.01867368401983, |
|
"learning_rate": 5.8682408883346535e-06, |
|
"logits/chosen": -0.3766348659992218, |
|
"logits/rejected": -0.22392578423023224, |
|
"logps/chosen": -23.370777130126953, |
|
"logps/rejected": -121.3629150390625, |
|
"loss": 0.0607, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 2.7753195762634277, |
|
"rewards/margins": 9.217280387878418, |
|
"rewards/rejected": -6.441961765289307, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 1.6842105263157894, |
|
"grad_norm": 4.079342276288671, |
|
"learning_rate": 5e-06, |
|
"logits/chosen": -0.5493069887161255, |
|
"logits/rejected": -0.3390986919403076, |
|
"logps/chosen": -26.047971725463867, |
|
"logps/rejected": -90.44429016113281, |
|
"loss": 0.0339, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 2.553872585296631, |
|
"rewards/margins": 5.881150722503662, |
|
"rewards/rejected": -3.3272786140441895, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.8245614035087718, |
|
"grad_norm": 1.7556159148296524, |
|
"learning_rate": 4.131759111665349e-06, |
|
"logits/chosen": -0.4146791398525238, |
|
"logits/rejected": -0.286813884973526, |
|
"logps/chosen": -24.490224838256836, |
|
"logps/rejected": -102.24156188964844, |
|
"loss": 0.0562, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 2.6847214698791504, |
|
"rewards/margins": 7.214890003204346, |
|
"rewards/rejected": -4.5301690101623535, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 1.9649122807017543, |
|
"grad_norm": 1.5851709952279243, |
|
"learning_rate": 3.289899283371657e-06, |
|
"logits/chosen": -0.44919806718826294, |
|
"logits/rejected": -0.38574808835983276, |
|
"logps/chosen": -21.76868438720703, |
|
"logps/rejected": -93.4135513305664, |
|
"loss": 0.0404, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 2.8682363033294678, |
|
"rewards/margins": 6.415185928344727, |
|
"rewards/rejected": -3.5469493865966797, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 2.1052631578947367, |
|
"grad_norm": 0.8663363353210317, |
|
"learning_rate": 2.5000000000000015e-06, |
|
"logits/chosen": -0.3753425180912018, |
|
"logits/rejected": -0.3148278295993805, |
|
"logps/chosen": -27.85059928894043, |
|
"logps/rejected": -96.494140625, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.4423599243164062, |
|
"rewards/margins": 6.41550874710083, |
|
"rewards/rejected": -3.9731483459472656, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.245614035087719, |
|
"grad_norm": 0.46918563058013124, |
|
"learning_rate": 1.7860619515673034e-06, |
|
"logits/chosen": -0.37954726815223694, |
|
"logits/rejected": -0.3316181004047394, |
|
"logps/chosen": -25.504596710205078, |
|
"logps/rejected": -102.33647155761719, |
|
"loss": 0.0106, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.5688281059265137, |
|
"rewards/margins": 7.0828657150268555, |
|
"rewards/rejected": -4.514037132263184, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 2.3859649122807016, |
|
"grad_norm": 1.3915012816414571, |
|
"learning_rate": 1.1697777844051105e-06, |
|
"logits/chosen": -0.32976603507995605, |
|
"logits/rejected": -0.24117198586463928, |
|
"logps/chosen": -22.648927688598633, |
|
"logps/rejected": -111.1461181640625, |
|
"loss": 0.0722, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 2.808568239212036, |
|
"rewards/margins": 8.266373634338379, |
|
"rewards/rejected": -5.457805633544922, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 2.526315789473684, |
|
"grad_norm": 0.7864294623647854, |
|
"learning_rate": 6.698729810778065e-07, |
|
"logits/chosen": -0.31580400466918945, |
|
"logits/rejected": -0.23523080348968506, |
|
"logps/chosen": -21.38547706604004, |
|
"logps/rejected": -112.5594482421875, |
|
"loss": 0.0132, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.9571423530578613, |
|
"rewards/margins": 8.440750122070312, |
|
"rewards/rejected": -5.483607769012451, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 1.6842586899427825, |
|
"learning_rate": 3.015368960704584e-07, |
|
"logits/chosen": -0.227472722530365, |
|
"logits/rejected": -0.19328871369361877, |
|
"logps/chosen": -18.14565086364746, |
|
"logps/rejected": -115.78607177734375, |
|
"loss": 0.0465, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.302818536758423, |
|
"rewards/margins": 9.093223571777344, |
|
"rewards/rejected": -5.790404796600342, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 2.807017543859649, |
|
"grad_norm": 0.37240158397575107, |
|
"learning_rate": 7.59612349389599e-08, |
|
"logits/chosen": -0.28918278217315674, |
|
"logits/rejected": -0.20876720547676086, |
|
"logps/chosen": -20.228591918945312, |
|
"logps/rejected": -110.49909210205078, |
|
"loss": 0.0085, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.1952502727508545, |
|
"rewards/margins": 8.55958366394043, |
|
"rewards/rejected": -5.36433219909668, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.807017543859649, |
|
"eval_logits/chosen": -0.5716415643692017, |
|
"eval_logits/rejected": -0.5896289944648743, |
|
"eval_logps/chosen": -19.139745712280273, |
|
"eval_logps/rejected": -113.21296691894531, |
|
"eval_loss": 0.07458853721618652, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 3.339810371398926, |
|
"eval_rewards/margins": 8.802544593811035, |
|
"eval_rewards/rejected": -5.462734222412109, |
|
"eval_runtime": 18.3879, |
|
"eval_samples_per_second": 5.438, |
|
"eval_steps_per_second": 2.719, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.9473684210526314, |
|
"grad_norm": 2.3675721766606554, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.3103935122489929, |
|
"logits/rejected": -0.23779892921447754, |
|
"logps/chosen": -24.52212905883789, |
|
"logps/rejected": -107.23027038574219, |
|
"loss": 0.0789, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 2.7476420402526855, |
|
"rewards/margins": 7.737358093261719, |
|
"rewards/rejected": -4.989716529846191, |
|
"step": 21 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 21, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 5, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1322411360256.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|