|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.3590033975084936, |
|
"eval_steps": 20, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09060022650056625, |
|
"grad_norm": 4.824601173400879, |
|
"learning_rate": 1.9393939393939395e-05, |
|
"loss": 6.3414, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09060022650056625, |
|
"eval_loss": 5.462944984436035, |
|
"eval_runtime": 169.7392, |
|
"eval_samples_per_second": 2.327, |
|
"eval_steps_per_second": 0.583, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1812004530011325, |
|
"grad_norm": 3.5273354053497314, |
|
"learning_rate": 1.8585858585858588e-05, |
|
"loss": 4.9204, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1812004530011325, |
|
"eval_loss": 4.40663480758667, |
|
"eval_runtime": 169.7659, |
|
"eval_samples_per_second": 2.327, |
|
"eval_steps_per_second": 0.583, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2718006795016987, |
|
"grad_norm": 2.5483546257019043, |
|
"learning_rate": 1.7777777777777777e-05, |
|
"loss": 4.0241, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2718006795016987, |
|
"eval_loss": 3.6545450687408447, |
|
"eval_runtime": 169.7082, |
|
"eval_samples_per_second": 2.328, |
|
"eval_steps_per_second": 0.583, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.362400906002265, |
|
"grad_norm": 1.4762500524520874, |
|
"learning_rate": 1.6969696969696972e-05, |
|
"loss": 3.4114, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.362400906002265, |
|
"eval_loss": 3.1997323036193848, |
|
"eval_runtime": 169.459, |
|
"eval_samples_per_second": 2.331, |
|
"eval_steps_per_second": 0.584, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.45300113250283125, |
|
"grad_norm": 1.2465909719467163, |
|
"learning_rate": 1.616161616161616e-05, |
|
"loss": 3.0527, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.45300113250283125, |
|
"eval_loss": 2.9332973957061768, |
|
"eval_runtime": 169.4702, |
|
"eval_samples_per_second": 2.331, |
|
"eval_steps_per_second": 0.584, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5436013590033975, |
|
"grad_norm": 1.2837079763412476, |
|
"learning_rate": 1.5353535353535354e-05, |
|
"loss": 2.8401, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5436013590033975, |
|
"eval_loss": 2.765261173248291, |
|
"eval_runtime": 169.5654, |
|
"eval_samples_per_second": 2.329, |
|
"eval_steps_per_second": 0.584, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6342015855039638, |
|
"grad_norm": 1.069353699684143, |
|
"learning_rate": 1.4545454545454546e-05, |
|
"loss": 2.7202, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6342015855039638, |
|
"eval_loss": 2.654095411300659, |
|
"eval_runtime": 169.5632, |
|
"eval_samples_per_second": 2.33, |
|
"eval_steps_per_second": 0.584, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.72480181200453, |
|
"grad_norm": 1.0665814876556396, |
|
"learning_rate": 1.3737373737373739e-05, |
|
"loss": 2.605, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.72480181200453, |
|
"eval_loss": 2.576014995574951, |
|
"eval_runtime": 169.9075, |
|
"eval_samples_per_second": 2.325, |
|
"eval_steps_per_second": 0.583, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8154020385050963, |
|
"grad_norm": 1.076709508895874, |
|
"learning_rate": 1.2929292929292931e-05, |
|
"loss": 2.5533, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8154020385050963, |
|
"eval_loss": 2.519667148590088, |
|
"eval_runtime": 169.6071, |
|
"eval_samples_per_second": 2.329, |
|
"eval_steps_per_second": 0.584, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.9060022650056625, |
|
"grad_norm": 1.0686030387878418, |
|
"learning_rate": 1.2121212121212122e-05, |
|
"loss": 2.5004, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9060022650056625, |
|
"eval_loss": 2.4773340225219727, |
|
"eval_runtime": 169.6567, |
|
"eval_samples_per_second": 2.328, |
|
"eval_steps_per_second": 0.584, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9966024915062288, |
|
"grad_norm": 1.1253015995025635, |
|
"learning_rate": 1.1313131313131314e-05, |
|
"loss": 2.4613, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.9966024915062288, |
|
"eval_loss": 2.444694995880127, |
|
"eval_runtime": 169.7657, |
|
"eval_samples_per_second": 2.327, |
|
"eval_steps_per_second": 0.583, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.087202718006795, |
|
"grad_norm": 1.1171083450317383, |
|
"learning_rate": 1.0505050505050507e-05, |
|
"loss": 2.4456, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.087202718006795, |
|
"eval_loss": 2.4184916019439697, |
|
"eval_runtime": 169.7027, |
|
"eval_samples_per_second": 2.328, |
|
"eval_steps_per_second": 0.583, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.1778029445073612, |
|
"grad_norm": 1.1789259910583496, |
|
"learning_rate": 9.696969696969698e-06, |
|
"loss": 2.4151, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.1778029445073612, |
|
"eval_loss": 2.397007465362549, |
|
"eval_runtime": 169.6356, |
|
"eval_samples_per_second": 2.329, |
|
"eval_steps_per_second": 0.584, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.2684031710079275, |
|
"grad_norm": 1.1507657766342163, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 2.3943, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.2684031710079275, |
|
"eval_loss": 2.3794679641723633, |
|
"eval_runtime": 169.6152, |
|
"eval_samples_per_second": 2.329, |
|
"eval_steps_per_second": 0.584, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.3590033975084936, |
|
"grad_norm": 1.1052231788635254, |
|
"learning_rate": 8.08080808080808e-06, |
|
"loss": 2.3621, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.3590033975084936, |
|
"eval_loss": 2.3650312423706055, |
|
"eval_runtime": 169.6247, |
|
"eval_samples_per_second": 2.329, |
|
"eval_steps_per_second": 0.584, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 20, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.252155720952709e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|