|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.015958507879513265, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00015958507879513265, |
|
"eval_loss": 2.050989866256714, |
|
"eval_runtime": 454.429, |
|
"eval_samples_per_second": 23.225, |
|
"eval_steps_per_second": 2.905, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00047875523638539794, |
|
"grad_norm": 0.9697971343994141, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.9725, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0009575104727707959, |
|
"grad_norm": 0.901890754699707, |
|
"learning_rate": 3e-05, |
|
"loss": 2.0065, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.001436265709156194, |
|
"grad_norm": 0.9856873154640198, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.931, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.001436265709156194, |
|
"eval_loss": 1.9257014989852905, |
|
"eval_runtime": 457.2135, |
|
"eval_samples_per_second": 23.083, |
|
"eval_steps_per_second": 2.887, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0019150209455415918, |
|
"grad_norm": 0.7096590399742126, |
|
"learning_rate": 4.993910125649561e-05, |
|
"loss": 1.7845, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0023937761819269898, |
|
"grad_norm": 0.726519763469696, |
|
"learning_rate": 4.962019382530521e-05, |
|
"loss": 1.8313, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.002872531418312388, |
|
"grad_norm": 0.6210381388664246, |
|
"learning_rate": 4.9031542398457974e-05, |
|
"loss": 1.8015, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.002872531418312388, |
|
"eval_loss": 1.7303786277770996, |
|
"eval_runtime": 456.9299, |
|
"eval_samples_per_second": 23.098, |
|
"eval_steps_per_second": 2.889, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0033512866546977858, |
|
"grad_norm": 0.6193668246269226, |
|
"learning_rate": 4.817959636416969e-05, |
|
"loss": 1.7996, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0038300418910831835, |
|
"grad_norm": 0.6268954873085022, |
|
"learning_rate": 4.707368982147318e-05, |
|
"loss": 1.6603, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.004308797127468581, |
|
"grad_norm": 0.5985816121101379, |
|
"learning_rate": 4.572593931387604e-05, |
|
"loss": 1.716, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.004308797127468581, |
|
"eval_loss": 1.707382321357727, |
|
"eval_runtime": 456.9479, |
|
"eval_samples_per_second": 23.097, |
|
"eval_steps_per_second": 2.889, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0047875523638539795, |
|
"grad_norm": 0.5452908873558044, |
|
"learning_rate": 4.415111107797445e-05, |
|
"loss": 1.7306, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.005266307600239378, |
|
"grad_norm": 0.5148115754127502, |
|
"learning_rate": 4.2366459261474933e-05, |
|
"loss": 1.658, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.005745062836624776, |
|
"grad_norm": 0.5095010995864868, |
|
"learning_rate": 4.039153688314145e-05, |
|
"loss": 1.7653, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.005745062836624776, |
|
"eval_loss": 1.6911619901657104, |
|
"eval_runtime": 457.3928, |
|
"eval_samples_per_second": 23.074, |
|
"eval_steps_per_second": 2.886, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.006223818073010173, |
|
"grad_norm": 0.480747789144516, |
|
"learning_rate": 3.824798160583012e-05, |
|
"loss": 1.6363, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0067025733093955715, |
|
"grad_norm": 0.4569284915924072, |
|
"learning_rate": 3.5959278669726935e-05, |
|
"loss": 1.6942, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.00718132854578097, |
|
"grad_norm": 0.5147221684455872, |
|
"learning_rate": 3.355050358314172e-05, |
|
"loss": 1.7732, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.00718132854578097, |
|
"eval_loss": 1.6826040744781494, |
|
"eval_runtime": 457.1352, |
|
"eval_samples_per_second": 23.087, |
|
"eval_steps_per_second": 2.888, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.007660083782166367, |
|
"grad_norm": 0.46845507621765137, |
|
"learning_rate": 3.104804738999169e-05, |
|
"loss": 1.6482, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.008138839018551766, |
|
"grad_norm": 0.5122058391571045, |
|
"learning_rate": 2.8479327524001636e-05, |
|
"loss": 1.6855, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.008617594254937163, |
|
"grad_norm": 0.46340513229370117, |
|
"learning_rate": 2.587248741756253e-05, |
|
"loss": 1.5771, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.008617594254937163, |
|
"eval_loss": 1.678298830986023, |
|
"eval_runtime": 457.7537, |
|
"eval_samples_per_second": 23.056, |
|
"eval_steps_per_second": 2.884, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.00909634949132256, |
|
"grad_norm": 0.4864901602268219, |
|
"learning_rate": 2.3256088156396868e-05, |
|
"loss": 1.6974, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.009575104727707959, |
|
"grad_norm": 0.47456270456314087, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 1.7266, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.010053859964093357, |
|
"grad_norm": 0.44960471987724304, |
|
"learning_rate": 1.8109066104575023e-05, |
|
"loss": 1.6703, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.010053859964093357, |
|
"eval_loss": 1.6748311519622803, |
|
"eval_runtime": 457.1158, |
|
"eval_samples_per_second": 23.088, |
|
"eval_steps_per_second": 2.888, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.010532615200478756, |
|
"grad_norm": 0.5045154690742493, |
|
"learning_rate": 1.56348351646022e-05, |
|
"loss": 1.6842, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.011011370436864154, |
|
"grad_norm": 0.49012699723243713, |
|
"learning_rate": 1.3263210930352737e-05, |
|
"loss": 1.6247, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.011490125673249552, |
|
"grad_norm": 0.5065352320671082, |
|
"learning_rate": 1.1020177413231334e-05, |
|
"loss": 1.7605, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.011490125673249552, |
|
"eval_loss": 1.6728583574295044, |
|
"eval_runtime": 457.0738, |
|
"eval_samples_per_second": 23.09, |
|
"eval_steps_per_second": 2.888, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.011968880909634948, |
|
"grad_norm": 0.5055304169654846, |
|
"learning_rate": 8.930309757836517e-06, |
|
"loss": 1.5792, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.012447636146020347, |
|
"grad_norm": 0.46882903575897217, |
|
"learning_rate": 7.016504991533726e-06, |
|
"loss": 1.714, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.012926391382405745, |
|
"grad_norm": 0.5146296620368958, |
|
"learning_rate": 5.299731159831953e-06, |
|
"loss": 1.5839, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.012926391382405745, |
|
"eval_loss": 1.6719353199005127, |
|
"eval_runtime": 456.9575, |
|
"eval_samples_per_second": 23.096, |
|
"eval_steps_per_second": 2.889, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.013405146618791143, |
|
"grad_norm": 0.4731264114379883, |
|
"learning_rate": 3.798797596089351e-06, |
|
"loss": 1.7055, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.013883901855176541, |
|
"grad_norm": 0.5054563879966736, |
|
"learning_rate": 2.5301488425208296e-06, |
|
"loss": 1.5845, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.01436265709156194, |
|
"grad_norm": 0.46672162413597107, |
|
"learning_rate": 1.5076844803522922e-06, |
|
"loss": 1.7207, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01436265709156194, |
|
"eval_loss": 1.671474575996399, |
|
"eval_runtime": 456.9871, |
|
"eval_samples_per_second": 23.095, |
|
"eval_steps_per_second": 2.888, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.014841412327947338, |
|
"grad_norm": 0.5002412796020508, |
|
"learning_rate": 7.426068431000882e-07, |
|
"loss": 1.6392, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.015320167564332734, |
|
"grad_norm": 0.47195887565612793, |
|
"learning_rate": 2.4329828146074095e-07, |
|
"loss": 1.6559, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.015798922800718134, |
|
"grad_norm": 0.513578474521637, |
|
"learning_rate": 1.522932452260595e-08, |
|
"loss": 1.7077, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.015798922800718134, |
|
"eval_loss": 1.6713879108428955, |
|
"eval_runtime": 457.4203, |
|
"eval_samples_per_second": 23.073, |
|
"eval_steps_per_second": 2.886, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.75001861668864e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|