mgfrantz
/

axolotl-test

@@ -30,11 +30,30 @@ hf_use_auth_token: true
 hub_model_id: mgfrantz/axolotl-test
 mlflow_experiment_name: axolotl-test
-# Data config
-dataset_prepared_path: null
 datasets:
-- path: mhenrichsen/alpaca_2k_test
-  type: alpaca
 # Training config
 debug: null
@@ -46,7 +65,7 @@ flash_attention: true
 fp16: null
 fsdp: null
 fsdp_config: null
-gradient_accumulation_steps: 4
 gradient_checkpointing: true
 group_by_length: false
@@ -79,7 +98,6 @@ strict: false
 tf32: false
 tokenizer_type: LlamaTokenizer
 train_on_inputs: false
-val_set_size: 0.05
 wandb_entity: null
 wandb_log_model: null
 wandb_name: null
@@ -97,7 +115,7 @@ xformers_attention: null
 This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.3119
 ## Model description
@@ -120,8 +138,6 @@ The following hyperparameters were used during training:
 - train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
-- gradient_accumulation_steps: 4
-- total_train_batch_size: 32
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
@@ -129,26 +145,18 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch  | Step | Validation Loss |
-|:-------------:|:------:|:----:|:---------------:|
-| 1.4095        | 0.3333 | 1    | 1.4451          |
-| 1.4009        | 0.6667 | 2    | 1.4447          |
-| 1.4018        | 1.0    | 3    | 1.4426          |
-| 1.3928        | 1.25   | 4    | 1.4369          |
-| 1.3754        | 1.5833 | 5    | 1.4268          |
-| 1.399         | 1.9167 | 6    | 1.4080          |
-| 1.3767        | 2.1667 | 7    | 1.3829          |
-| 1.3624        | 2.5    | 8    | 1.3579          |
-| 1.3157        | 2.8333 | 9    | 1.3376          |
-| 1.2913        | 3.0833 | 10   | 1.3260          |
-| 1.2814        | 3.4167 | 11   | 1.3165          |
-| 1.2778        | 3.75   | 12   | 1.3119          |
 ### Framework versions
-- PEFT 0.13.0
-- Transformers 4.45.0
 - Pytorch 2.4.1+cu121
-- Datasets 2.21.0
-- Tokenizers 0.20.0

 hub_model_id: mgfrantz/axolotl-test
 mlflow_experiment_name: axolotl-test
+# # Data config
+dataset_prepared_path: data
+# val_set_size: 0.1
 datasets:
+  - path: data/train.jsonl
+    ds_type: json
+    data_files:
+      - data/train.jsonl
+    conversation: alpaca
+    type: sharegpt
+    # role:
+    #   input:
+    #   output:
+test_datasets:
+  - path: data/test.jsonl
+    ds_type: json
+    # You need to specify a split. For "json" datasets the default split is called "train".
+    split: train
+    type: sharegpt
+    conversation: alpaca
+    data_files:
+      - data/eval.jsonl
 # Training config
 debug: null
 fp16: null
 fsdp: null
 fsdp_config: null
+gradient_accumulation_steps: 1
 gradient_checkpointing: true
 group_by_length: false
 tf32: false
 tokenizer_type: LlamaTokenizer
 train_on_inputs: false
 wandb_entity: null
 wandb_log_model: null
 wandb_name: null
 This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 3.3823
 ## Model description
 - train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
 ### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 5.5347        | 1.0   | 1    | 3.4154          |
+| 6.0917        | 2.0   | 2    | 3.4115          |
+| 5.2981        | 2.0   | 3    | 3.4036          |
+| 7.2093        | 3.0   | 4    | 3.3823          |
 ### Framework versions
+- PEFT 0.13.2
+- Transformers 4.45.2
 - Pytorch 2.4.1+cu121
+- Datasets 3.0.1
+- Tokenizers 0.20.1

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0980833a4632cbba047ede4dedeb0917b87fd0d2f4cc18e618ea354f41b0814f
 size 101036698

 version https://git-lfs.github.com/spec/v1
+oid sha256:d9b176e0cecd4e2b5dbd00bf7bebc35eaede4916a9c5bb92c5587aedca365b94
 size 101036698