{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.971563981042654, "eval_steps": 100, "global_step": 104, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.018957345971563982, "grad_norm": 17.039526624858368, "learning_rate": 4.545454545454545e-08, "logits/chosen": -16.87786293029785, "logits/rejected": -17.083940505981445, "logps/chosen": -334.51202392578125, "logps/rejected": -430.7342224121094, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.1895734597156398, "grad_norm": 17.02641660622309, "learning_rate": 4.545454545454545e-07, "logits/chosen": -17.35299301147461, "logits/rejected": -17.494272232055664, "logps/chosen": -402.1957702636719, "logps/rejected": -436.8443603515625, "loss": 0.6942, "rewards/accuracies": 0.4444444477558136, "rewards/chosen": 0.0008524802979081869, "rewards/margins": 0.0015829234616830945, "rewards/rejected": -0.000730442872736603, "step": 10 }, { "epoch": 0.3791469194312796, "grad_norm": 15.563346495094395, "learning_rate": 4.885348141000122e-07, "logits/chosen": -16.698389053344727, "logits/rejected": -16.837512969970703, "logps/chosen": -379.29327392578125, "logps/rejected": -413.91607666015625, "loss": 0.689, "rewards/accuracies": 0.4312500059604645, "rewards/chosen": 0.007832124829292297, "rewards/margins": -0.00933709554374218, "rewards/rejected": 0.017169222235679626, "step": 20 }, { "epoch": 0.5687203791469194, "grad_norm": 17.011437827705663, "learning_rate": 4.5025027361734613e-07, "logits/chosen": -17.711877822875977, "logits/rejected": -17.14129638671875, "logps/chosen": -398.89691162109375, "logps/rejected": -415.79156494140625, "loss": 0.6725, "rewards/accuracies": 0.606249988079071, "rewards/chosen": 0.08761711418628693, "rewards/margins": 0.03942377120256424, "rewards/rejected": 0.04819334298372269, "step": 30 }, { "epoch": 0.7582938388625592, "grad_norm": 14.678769570259123, "learning_rate": 3.893311157806091e-07, "logits/chosen": -17.091754913330078, "logits/rejected": -16.351659774780273, "logps/chosen": -358.50946044921875, "logps/rejected": -351.41278076171875, "loss": 0.6555, "rewards/accuracies": 0.65625, "rewards/chosen": 0.22874267399311066, "rewards/margins": 0.08902247995138168, "rewards/rejected": 0.13972017168998718, "step": 40 }, { "epoch": 0.9478672985781991, "grad_norm": 16.97842458896973, "learning_rate": 3.126631330646801e-07, "logits/chosen": -18.3736515045166, "logits/rejected": -18.453527450561523, "logps/chosen": -411.6908264160156, "logps/rejected": -464.92529296875, "loss": 0.6319, "rewards/accuracies": 0.71875, "rewards/chosen": 0.4013705253601074, "rewards/margins": 0.18392089009284973, "rewards/rejected": 0.2174496352672577, "step": 50 }, { "epoch": 1.1374407582938388, "grad_norm": 14.380477446845697, "learning_rate": 2.2891223348923882e-07, "logits/chosen": -18.0875301361084, "logits/rejected": -17.782365798950195, "logps/chosen": -400.21282958984375, "logps/rejected": -429.13922119140625, "loss": 0.6034, "rewards/accuracies": 0.75, "rewards/chosen": 0.38667935132980347, "rewards/margins": 0.23242299258708954, "rewards/rejected": 0.15425635874271393, "step": 60 }, { "epoch": 1.3270142180094786, "grad_norm": 13.517572081923934, "learning_rate": 1.4754491880085317e-07, "logits/chosen": -17.54964828491211, "logits/rejected": -17.380077362060547, "logps/chosen": -369.05426025390625, "logps/rejected": -405.892578125, "loss": 0.5985, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": 0.3417219817638397, "rewards/margins": 0.23048743605613708, "rewards/rejected": 0.11123454570770264, "step": 70 }, { "epoch": 1.5165876777251186, "grad_norm": 13.094353030731684, "learning_rate": 7.775827023107834e-08, "logits/chosen": -17.020626068115234, "logits/rejected": -17.626625061035156, "logps/chosen": -355.83404541015625, "logps/rejected": -411.6244201660156, "loss": 0.5786, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": 0.23768803477287292, "rewards/margins": 0.33307021856307983, "rewards/rejected": -0.09538215398788452, "step": 80 }, { "epoch": 1.7061611374407581, "grad_norm": 14.853001845607317, "learning_rate": 2.7440387297912122e-08, "logits/chosen": -17.405044555664062, "logits/rejected": -17.63189697265625, "logps/chosen": -387.94354248046875, "logps/rejected": -437.82366943359375, "loss": 0.5742, "rewards/accuracies": 0.793749988079071, "rewards/chosen": 0.27590411901474, "rewards/margins": 0.3552981913089752, "rewards/rejected": -0.07939404994249344, "step": 90 }, { "epoch": 1.8957345971563981, "grad_norm": 13.859137885848625, "learning_rate": 2.27878296044029e-09, "logits/chosen": -17.7724552154541, "logits/rejected": -17.65437889099121, "logps/chosen": -380.60955810546875, "logps/rejected": -411.00164794921875, "loss": 0.5725, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": 0.2799278199672699, "rewards/margins": 0.2864844799041748, "rewards/rejected": -0.006556662730872631, "step": 100 }, { "epoch": 1.8957345971563981, "eval_logits/chosen": -16.585969924926758, "eval_logits/rejected": -15.948739051818848, "eval_logps/chosen": -363.741943359375, "eval_logps/rejected": -363.7130432128906, "eval_loss": 0.5959563255310059, "eval_rewards/accuracies": 0.71875, "eval_rewards/chosen": 0.22609315812587738, "eval_rewards/margins": 0.2597140967845917, "eval_rewards/rejected": -0.03362090513110161, "eval_runtime": 8.7004, "eval_samples_per_second": 86.203, "eval_steps_per_second": 2.758, "step": 100 }, { "epoch": 1.971563981042654, "step": 104, "total_flos": 0.0, "train_loss": 0.6262502945386447, "train_runtime": 431.1142, "train_samples_per_second": 31.314, "train_steps_per_second": 0.241 } ], "logging_steps": 10, "max_steps": 104, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }