{ "best_metric": 5.127074241638184, "best_model_checkpoint": "./outputs/llama2-13B-lora-QuArch_0_1_1_alpaca_filtered-answer-context-test-new/checkpoint-4", "epoch": 10.105263157894736, "eval_steps": 4, "global_step": 48, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.21052631578947367, "eval_accuracy": 0.6153846153846154, "eval_loss": 5.132209300994873, "eval_runtime": 14.9638, "eval_samples_per_second": 3.475, "eval_steps_per_second": 1.738, "step": 1 }, { "epoch": 0.8421052631578947, "eval_accuracy": 0.6346153846153846, "eval_loss": 5.127074241638184, "eval_runtime": 15.5565, "eval_samples_per_second": 3.343, "eval_steps_per_second": 1.671, "step": 4 }, { "epoch": 1.6842105263157894, "eval_accuracy": 0.6538461538461539, "eval_loss": 5.060066223144531, "eval_runtime": 15.7285, "eval_samples_per_second": 3.306, "eval_steps_per_second": 1.653, "step": 8 }, { "epoch": 2.1052631578947367, "grad_norm": 2.453125, "learning_rate": 2e-05, "loss": 5.1323, "step": 10 }, { "epoch": 2.526315789473684, "eval_accuracy": 0.7884615384615384, "eval_loss": 4.774318695068359, "eval_runtime": 15.4298, "eval_samples_per_second": 3.37, "eval_steps_per_second": 1.685, "step": 12 }, { "epoch": 3.3684210526315788, "eval_accuracy": 0.9230769230769231, "eval_loss": 4.04908561706543, "eval_runtime": 15.16, "eval_samples_per_second": 3.43, "eval_steps_per_second": 1.715, "step": 16 }, { "epoch": 4.2105263157894735, "grad_norm": 6.03125, "learning_rate": 1.8181818181818182e-05, "loss": 4.2735, "step": 20 }, { "epoch": 4.2105263157894735, "eval_accuracy": 0.8846153846153846, "eval_loss": 2.6443593502044678, "eval_runtime": 15.6094, "eval_samples_per_second": 3.331, "eval_steps_per_second": 1.666, "step": 20 }, { "epoch": 5.052631578947368, "eval_accuracy": 0.9615384615384616, "eval_loss": 1.0550649166107178, "eval_runtime": 15.8572, "eval_samples_per_second": 3.279, "eval_steps_per_second": 1.64, "step": 24 }, { "epoch": 5.894736842105263, "eval_accuracy": 0.6923076923076923, "eval_loss": 0.4698377847671509, "eval_runtime": 15.837, "eval_samples_per_second": 3.283, "eval_steps_per_second": 1.642, "step": 28 }, { "epoch": 6.315789473684211, "grad_norm": 0.6875, "learning_rate": 1.6363636363636366e-05, "loss": 1.2232, "step": 30 }, { "epoch": 6.7368421052631575, "eval_accuracy": 0.6730769230769231, "eval_loss": 0.3224477767944336, "eval_runtime": 15.7452, "eval_samples_per_second": 3.303, "eval_steps_per_second": 1.651, "step": 32 }, { "epoch": 7.578947368421053, "eval_accuracy": 1.0, "eval_loss": 0.2526957392692566, "eval_runtime": 15.7721, "eval_samples_per_second": 3.297, "eval_steps_per_second": 1.648, "step": 36 }, { "epoch": 8.421052631578947, "grad_norm": 0.388671875, "learning_rate": 1.4545454545454546e-05, "loss": 0.3083, "step": 40 }, { "epoch": 8.421052631578947, "eval_accuracy": 1.0, "eval_loss": 0.1972220242023468, "eval_runtime": 15.1578, "eval_samples_per_second": 3.431, "eval_steps_per_second": 1.715, "step": 40 }, { "epoch": 9.263157894736842, "eval_accuracy": 0.9615384615384616, "eval_loss": 0.13723143935203552, "eval_runtime": 16.0784, "eval_samples_per_second": 3.234, "eval_steps_per_second": 1.617, "step": 44 }, { "epoch": 10.105263157894736, "eval_accuracy": 1.0, "eval_loss": 0.0802871510386467, "eval_runtime": 15.6064, "eval_samples_per_second": 3.332, "eval_steps_per_second": 1.666, "step": 48 } ], "logging_steps": 10, "max_steps": 120, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 4, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.224682093066322e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }