|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0881863560732112, |
|
"eval_steps": 25, |
|
"global_step": 29, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1064891846921797, |
|
"grad_norm": 0.2210182100534439, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5738, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1064891846921797, |
|
"eval_loss": 0.6764118671417236, |
|
"eval_runtime": 2.922, |
|
"eval_samples_per_second": 17.111, |
|
"eval_steps_per_second": 4.449, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.2129783693843594, |
|
"grad_norm": 0.2817445397377014, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6376, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.3194675540765391, |
|
"grad_norm": 0.22999179363250732, |
|
"learning_rate": 9.969572609838744e-05, |
|
"loss": 0.5983, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.4259567387687188, |
|
"grad_norm": 0.20780327916145325, |
|
"learning_rate": 9.878701917609207e-05, |
|
"loss": 0.6238, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.5324459234608985, |
|
"grad_norm": 0.18925385177135468, |
|
"learning_rate": 9.728616793536588e-05, |
|
"loss": 0.5685, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.6389351081530782, |
|
"grad_norm": 0.3107340335845947, |
|
"learning_rate": 9.521346881455356e-05, |
|
"loss": 0.5353, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.7454242928452579, |
|
"grad_norm": 0.2847810387611389, |
|
"learning_rate": 9.259695151358214e-05, |
|
"loss": 0.6103, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.8519134775374376, |
|
"grad_norm": 0.14824841916561127, |
|
"learning_rate": 8.947199994035401e-05, |
|
"loss": 0.5314, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.9584026622296173, |
|
"grad_norm": 0.12647435069084167, |
|
"learning_rate": 8.588087370409303e-05, |
|
"loss": 0.6292, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 1.064891846921797, |
|
"grad_norm": 0.1318836510181427, |
|
"learning_rate": 8.187213662662538e-05, |
|
"loss": 0.8062, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.1713810316139768, |
|
"grad_norm": 0.09517844766378403, |
|
"learning_rate": 7.75e-05, |
|
"loss": 0.6232, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 1.2778702163061564, |
|
"grad_norm": 0.08089004456996918, |
|
"learning_rate": 7.282358947176207e-05, |
|
"loss": 0.4146, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.3843594009983362, |
|
"grad_norm": 0.09264088422060013, |
|
"learning_rate": 6.790614547199907e-05, |
|
"loss": 0.5849, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 1.4908485856905158, |
|
"grad_norm": 0.06309893727302551, |
|
"learning_rate": 6.281416799501188e-05, |
|
"loss": 0.3409, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.5973377703826954, |
|
"grad_norm": 0.0876927301287651, |
|
"learning_rate": 5.761651730097142e-05, |
|
"loss": 0.6784, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.7038269550748752, |
|
"grad_norm": 0.06547556817531586, |
|
"learning_rate": 5.23834826990286e-05, |
|
"loss": 0.473, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.8103161397670549, |
|
"grad_norm": 0.08073204755783081, |
|
"learning_rate": 4.718583200498814e-05, |
|
"loss": 0.5896, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.9168053244592347, |
|
"grad_norm": 0.08649210631847382, |
|
"learning_rate": 4.209385452800095e-05, |
|
"loss": 0.5938, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 2.0232945091514143, |
|
"grad_norm": 0.11379551887512207, |
|
"learning_rate": 3.717641052823795e-05, |
|
"loss": 0.8089, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 2.129783693843594, |
|
"grad_norm": 0.07528755068778992, |
|
"learning_rate": 3.250000000000001e-05, |
|
"loss": 0.5308, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.2362728785357735, |
|
"grad_norm": 0.056557416915893555, |
|
"learning_rate": 2.8127863373374635e-05, |
|
"loss": 0.4656, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 2.3427620632279536, |
|
"grad_norm": 0.07839695364236832, |
|
"learning_rate": 2.4119126295906998e-05, |
|
"loss": 0.6126, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 2.449251247920133, |
|
"grad_norm": 0.05538203939795494, |
|
"learning_rate": 2.0528000059645997e-05, |
|
"loss": 0.4561, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 2.5557404326123128, |
|
"grad_norm": 0.0652206763625145, |
|
"learning_rate": 1.740304848641787e-05, |
|
"loss": 0.5319, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 2.6622296173044924, |
|
"grad_norm": 0.054131075739860535, |
|
"learning_rate": 1.4786531185446454e-05, |
|
"loss": 0.4677, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.6622296173044924, |
|
"eval_loss": 0.5396596193313599, |
|
"eval_runtime": 2.8224, |
|
"eval_samples_per_second": 17.716, |
|
"eval_steps_per_second": 4.606, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.7687188019966724, |
|
"grad_norm": 0.05989672616124153, |
|
"learning_rate": 1.2713832064634126e-05, |
|
"loss": 0.5111, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 2.875207986688852, |
|
"grad_norm": 0.06187813729047775, |
|
"learning_rate": 1.1212980823907931e-05, |
|
"loss": 0.5551, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.9816971713810316, |
|
"grad_norm": 0.08973059058189392, |
|
"learning_rate": 1.0304273901612566e-05, |
|
"loss": 0.7605, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 3.0881863560732112, |
|
"grad_norm": 0.0628635361790657, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5706, |
|
"step": 29 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 29, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 60, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.217251561273557e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|