mgfrantz
/

axolotl-test

@@ -22,6 +22,7 @@ axolotl version: `0.4.1`
 # Model config
 adapter: qlora
 base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
 bf16: auto
 # HF hub config (push to huggingface)
@@ -32,7 +33,6 @@ mlflow_experiment_name: axolotl-test
 # # Data config
 dataset_prepared_path: data
-# val_set_size: 0.1
 chat_template: chatml
 datasets:
   - path: data/train.jsonl
@@ -41,9 +41,6 @@ datasets:
       - data/train.jsonl
     conversation: alpaca
     type: sharegpt
-    # role:
-    #   input:
-    #   output:
 test_datasets:
   - path: data/eval.jsonl
@@ -66,7 +63,7 @@ flash_attention: true
 fp16: null
 fsdp: null
 fsdp_config: null
-gradient_accumulation_steps: 1
 gradient_checkpointing: true
 group_by_length: false
@@ -116,7 +113,7 @@ xformers_attention: null
 This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.5572
 ## Model description
@@ -139,6 +136,8 @@ The following hyperparameters were used during training:
 - train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
@@ -146,30 +145,18 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss |
-|:-------------:|:-----:|:----:|:---------------:|
-| 6.4934        | 0.25  | 1    | 2.0690          |
-| 2.5023        | 0.5   | 2    | 2.0673          |
-| 4.9022        | 0.75  | 3    | 2.0621          |
-| 5.6912        | 1.0   | 4    | 2.0491          |
-| 5.1317        | 1.25  | 5    | 2.0230          |
-| 5.5762        | 1.25  | 6    | 1.9738          |
-| 3.3504        | 1.5   | 7    | 1.9053          |
-| 5.1877        | 1.75  | 8    | 1.8346          |
-| 3.8815        | 2.0   | 9    | 1.7862          |
-| 3.5814        | 2.25  | 10   | 1.7475          |
-| 3.3579        | 2.25  | 11   | 1.6987          |
-| 3.5511        | 2.5   | 12   | 1.6555          |
-| 3.3339        | 2.75  | 13   | 1.6107          |
-| 2.8774        | 3.0   | 14   | 1.5778          |
-| 3.1427        | 3.25  | 15   | 1.5620          |
-| 3.3465        | 3.25  | 16   | 1.5572          |
 ### Framework versions
 - PEFT 0.13.2
 - Transformers 4.45.2
-- Pytorch 2.4.1+cu121
 - Datasets 3.0.1
 - Tokenizers 0.20.1

 # Model config
 adapter: qlora
 base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
+# base_model: meta-llama/Llama-3.2-3B
 bf16: auto
 # HF hub config (push to huggingface)
 # # Data config
 dataset_prepared_path: data
 chat_template: chatml
 datasets:
   - path: data/train.jsonl
       - data/train.jsonl
     conversation: alpaca
     type: sharegpt
 test_datasets:
   - path: data/eval.jsonl
 fp16: null
 fsdp: null
 fsdp_config: null
+gradient_accumulation_steps: 8
 gradient_checkpointing: true
 group_by_length: false
 This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.4338
 ## Model description
 - train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 64
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
 ### Training results
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| 3.4962        | 0.5714 | 1    | 2.4779          |
+| 5.3564        | 1.0714 | 2    | 2.4760          |
+| 4.3272        | 1.6429 | 3    | 2.4633          |
+| 4.7348        | 2.1429 | 4    | 2.4338          |
 ### Framework versions
 - PEFT 0.13.2
 - Transformers 4.45.2
+- Pytorch 2.4.0+cu121
 - Datasets 3.0.1
 - Tokenizers 0.20.1

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:44772c4af3abc75d6063ca37102b982b62d41ac5fad308cde51f7d47e39986ef
 size 101036698

 version https://git-lfs.github.com/spec/v1
+oid sha256:64d2cce8324e410604bb157b921b744134968d639973d938a5b47f1146461b05
 size 101036698