greener-13-2-base-8bitC / training_log.json
xyzzy
upload deprecated PEFT adapter model. use for eval only.
a22137f
raw
history blame contribute delete
445 Bytes
{
"base_model_name": "TheBloke_Llama-2-13B-fp16",
"base_model_class": "LlamaForCausalLM",
"base_loaded_in_4bit": false,
"base_loaded_in_8bit": true,
"projections": "q, v",
"loss": 2.5449,
"learning_rate": 0.0001125,
"epoch": 0.91,
"current_steps": 63,
"train_runtime": 3350.024,
"train_samples_per_second": 0.083,
"train_steps_per_second": 0.001,
"total_flos": 2.054433986838528e+16,
"train_loss": 2.506993532180786
}