mgfrantz commited on
Commit
216bc82
·
verified ·
1 Parent(s): b5a601d

End of training

Browse files
Files changed (2) hide show
  1. README.md +35 -27
  2. adapter_model.bin +1 -1
README.md CHANGED
@@ -30,11 +30,30 @@ hf_use_auth_token: true
30
  hub_model_id: mgfrantz/axolotl-test
31
  mlflow_experiment_name: axolotl-test
32
 
33
- # Data config
34
- dataset_prepared_path: null
 
35
  datasets:
36
- - path: mhenrichsen/alpaca_2k_test
37
- type: alpaca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  # Training config
40
  debug: null
@@ -46,7 +65,7 @@ flash_attention: true
46
  fp16: null
47
  fsdp: null
48
  fsdp_config: null
49
- gradient_accumulation_steps: 4
50
  gradient_checkpointing: true
51
  group_by_length: false
52
 
@@ -79,7 +98,6 @@ strict: false
79
  tf32: false
80
  tokenizer_type: LlamaTokenizer
81
  train_on_inputs: false
82
- val_set_size: 0.05
83
  wandb_entity: null
84
  wandb_log_model: null
85
  wandb_name: null
@@ -97,7 +115,7 @@ xformers_attention: null
97
 
98
  This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on the None dataset.
99
  It achieves the following results on the evaluation set:
100
- - Loss: 1.3119
101
 
102
  ## Model description
103
 
@@ -120,8 +138,6 @@ The following hyperparameters were used during training:
120
  - train_batch_size: 8
121
  - eval_batch_size: 8
122
  - seed: 42
123
- - gradient_accumulation_steps: 4
124
- - total_train_batch_size: 32
125
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
126
  - lr_scheduler_type: cosine
127
  - lr_scheduler_warmup_steps: 10
@@ -129,26 +145,18 @@ The following hyperparameters were used during training:
129
 
130
  ### Training results
131
 
132
- | Training Loss | Epoch | Step | Validation Loss |
133
- |:-------------:|:------:|:----:|:---------------:|
134
- | 1.4095 | 0.3333 | 1 | 1.4451 |
135
- | 1.4009 | 0.6667 | 2 | 1.4447 |
136
- | 1.4018 | 1.0 | 3 | 1.4426 |
137
- | 1.3928 | 1.25 | 4 | 1.4369 |
138
- | 1.3754 | 1.5833 | 5 | 1.4268 |
139
- | 1.399 | 1.9167 | 6 | 1.4080 |
140
- | 1.3767 | 2.1667 | 7 | 1.3829 |
141
- | 1.3624 | 2.5 | 8 | 1.3579 |
142
- | 1.3157 | 2.8333 | 9 | 1.3376 |
143
- | 1.2913 | 3.0833 | 10 | 1.3260 |
144
- | 1.2814 | 3.4167 | 11 | 1.3165 |
145
- | 1.2778 | 3.75 | 12 | 1.3119 |
146
 
147
 
148
  ### Framework versions
149
 
150
- - PEFT 0.13.0
151
- - Transformers 4.45.0
152
  - Pytorch 2.4.1+cu121
153
- - Datasets 2.21.0
154
- - Tokenizers 0.20.0
 
30
  hub_model_id: mgfrantz/axolotl-test
31
  mlflow_experiment_name: axolotl-test
32
 
33
+ # # Data config
34
+ dataset_prepared_path: data
35
+ # val_set_size: 0.1
36
  datasets:
37
+ - path: data/train.jsonl
38
+ ds_type: json
39
+ data_files:
40
+ - data/train.jsonl
41
+ conversation: alpaca
42
+ type: sharegpt
43
+ # role:
44
+ # input:
45
+ # output:
46
+
47
+ test_datasets:
48
+ - path: data/test.jsonl
49
+ ds_type: json
50
+ # You need to specify a split. For "json" datasets the default split is called "train".
51
+ split: train
52
+ type: sharegpt
53
+ conversation: alpaca
54
+ data_files:
55
+ - data/eval.jsonl
56
+
57
 
58
  # Training config
59
  debug: null
 
65
  fp16: null
66
  fsdp: null
67
  fsdp_config: null
68
+ gradient_accumulation_steps: 1
69
  gradient_checkpointing: true
70
  group_by_length: false
71
 
 
98
  tf32: false
99
  tokenizer_type: LlamaTokenizer
100
  train_on_inputs: false
 
101
  wandb_entity: null
102
  wandb_log_model: null
103
  wandb_name: null
 
115
 
116
  This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on the None dataset.
117
  It achieves the following results on the evaluation set:
118
+ - Loss: 3.3823
119
 
120
  ## Model description
121
 
 
138
  - train_batch_size: 8
139
  - eval_batch_size: 8
140
  - seed: 42
 
 
141
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
142
  - lr_scheduler_type: cosine
143
  - lr_scheduler_warmup_steps: 10
 
145
 
146
  ### Training results
147
 
148
+ | Training Loss | Epoch | Step | Validation Loss |
149
+ |:-------------:|:-----:|:----:|:---------------:|
150
+ | 5.5347 | 1.0 | 1 | 3.4154 |
151
+ | 6.0917 | 2.0 | 2 | 3.4115 |
152
+ | 5.2981 | 2.0 | 3 | 3.4036 |
153
+ | 7.2093 | 3.0 | 4 | 3.3823 |
 
 
 
 
 
 
 
 
154
 
155
 
156
  ### Framework versions
157
 
158
+ - PEFT 0.13.2
159
+ - Transformers 4.45.2
160
  - Pytorch 2.4.1+cu121
161
+ - Datasets 3.0.1
162
+ - Tokenizers 0.20.1
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0980833a4632cbba047ede4dedeb0917b87fd0d2f4cc18e618ea354f41b0814f
3
  size 101036698
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9b176e0cecd4e2b5dbd00bf7bebc35eaede4916a9c5bb92c5587aedca365b94
3
  size 101036698