|
{ |
|
"best_metric": 5.127074241638184, |
|
"best_model_checkpoint": "./outputs/llama2-13B-lora-QuArch_0_1_1_alpaca_filtered-answer-context-test-new/checkpoint-4", |
|
"epoch": 7.578947368421053, |
|
"eval_steps": 4, |
|
"global_step": 36, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.21052631578947367, |
|
"eval_accuracy": 0.6153846153846154, |
|
"eval_loss": 5.132209300994873, |
|
"eval_runtime": 14.9638, |
|
"eval_samples_per_second": 3.475, |
|
"eval_steps_per_second": 1.738, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.8421052631578947, |
|
"eval_accuracy": 0.6346153846153846, |
|
"eval_loss": 5.127074241638184, |
|
"eval_runtime": 15.5565, |
|
"eval_samples_per_second": 3.343, |
|
"eval_steps_per_second": 1.671, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 1.6842105263157894, |
|
"eval_accuracy": 0.6538461538461539, |
|
"eval_loss": 5.060066223144531, |
|
"eval_runtime": 15.7285, |
|
"eval_samples_per_second": 3.306, |
|
"eval_steps_per_second": 1.653, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 2.1052631578947367, |
|
"grad_norm": 2.453125, |
|
"learning_rate": 2e-05, |
|
"loss": 5.1323, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.526315789473684, |
|
"eval_accuracy": 0.7884615384615384, |
|
"eval_loss": 4.774318695068359, |
|
"eval_runtime": 15.4298, |
|
"eval_samples_per_second": 3.37, |
|
"eval_steps_per_second": 1.685, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 3.3684210526315788, |
|
"eval_accuracy": 0.9230769230769231, |
|
"eval_loss": 4.04908561706543, |
|
"eval_runtime": 15.16, |
|
"eval_samples_per_second": 3.43, |
|
"eval_steps_per_second": 1.715, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 4.2105263157894735, |
|
"grad_norm": 6.03125, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 4.2735, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 4.2105263157894735, |
|
"eval_accuracy": 0.8846153846153846, |
|
"eval_loss": 2.6443593502044678, |
|
"eval_runtime": 15.6094, |
|
"eval_samples_per_second": 3.331, |
|
"eval_steps_per_second": 1.666, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 5.052631578947368, |
|
"eval_accuracy": 0.9615384615384616, |
|
"eval_loss": 1.0550649166107178, |
|
"eval_runtime": 15.8572, |
|
"eval_samples_per_second": 3.279, |
|
"eval_steps_per_second": 1.64, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 5.894736842105263, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.4698377847671509, |
|
"eval_runtime": 15.837, |
|
"eval_samples_per_second": 3.283, |
|
"eval_steps_per_second": 1.642, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 6.315789473684211, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 1.6363636363636366e-05, |
|
"loss": 1.2232, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 6.7368421052631575, |
|
"eval_accuracy": 0.6730769230769231, |
|
"eval_loss": 0.3224477767944336, |
|
"eval_runtime": 15.7452, |
|
"eval_samples_per_second": 3.303, |
|
"eval_steps_per_second": 1.651, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 7.578947368421053, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.2526957392692566, |
|
"eval_runtime": 15.7721, |
|
"eval_samples_per_second": 3.297, |
|
"eval_steps_per_second": 1.648, |
|
"step": 36 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 4, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.185115697997414e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|