Nanobit commited on
Commit
f1ebaa0
·
unverified ·
1 Parent(s): 34ba634

chore(config): refactor old mistral config (#1435)

Browse files

* chore(config): refactor old mistral config

* chore: add link to colab on readme

README.md CHANGED
@@ -32,6 +32,7 @@ Features:
32
  - [Bare Metal Cloud GPU](#bare-metal-cloud-gpu)
33
  - [Windows](#windows)
34
  - [Mac](#mac)
 
35
  - [Launching on public clouds via SkyPilot](#launching-on-public-clouds-via-skypilot)
36
  - [Dataset](#dataset)
37
  - [How to Add Custom Prompts](#how-to-add-custom-prompts)
@@ -269,6 +270,10 @@ pip3 install -e '.'
269
  ```
270
  More info: [mac.md](/docs/mac.qmd)
271
 
 
 
 
 
272
  #### Launching on public clouds via SkyPilot
273
  To launch on GPU instances (both on-demand and spot instances) on 7+ clouds (GCP, AWS, Azure, OCI, and more), you can use [SkyPilot](https://skypilot.readthedocs.io/en/latest/index.html):
274
 
 
32
  - [Bare Metal Cloud GPU](#bare-metal-cloud-gpu)
33
  - [Windows](#windows)
34
  - [Mac](#mac)
35
+ - [Google Colab](#google-colab)
36
  - [Launching on public clouds via SkyPilot](#launching-on-public-clouds-via-skypilot)
37
  - [Dataset](#dataset)
38
  - [How to Add Custom Prompts](#how-to-add-custom-prompts)
 
270
  ```
271
  More info: [mac.md](/docs/mac.qmd)
272
 
273
+ #### Google Colab
274
+
275
+ Please use this example [notebook](examples/colab-notebooks/colab-axolotl-example.ipynb).
276
+
277
  #### Launching on public clouds via SkyPilot
278
  To launch on GPU instances (both on-demand and spot instances) on 7+ clouds (GCP, AWS, Azure, OCI, and more), you can use [SkyPilot](https://skypilot.readthedocs.io/en/latest/index.html):
279
 
examples/mistral/Mistral-7b-example/README.md DELETED
@@ -1,12 +0,0 @@
1
- # Description
2
- This repository presents an in-depth guide for fine-tuning Mistral-7b or any other compatible model using Axolotl, tailored specifically for chatbot development. It streamlines the process of fine-tuning and uploading the enhanced model to HuggingFace 🤗, thereby serving as an invaluable tool for developers in the AI and chatbot domain.
3
-
4
- **What’s Inside:**
5
-
6
- Beginner-Friendly Instructions: Comprehensive steps to guide you through fine-tuning your chosen model, including details on the data structure (jsonl), configuration, and the code itself.
7
-
8
- Hardware Utilized: For reference, the fine-tuning in this guide was performed using 4x NVIDIA GeForce RTX 3090 (rented 2.1.2-cuda12.1-cudnn8-devel).
9
-
10
- **Uploading to HuggingFace 🤗:**
11
- To upload your fine-tuned model to Hugging Face, include the following files:
12
- ![Screenshot 2024-01-19 213932](https://github.com/OpenAccess-AI-Collective/axolotl/assets/138583191/d660eb84-2d76-46a1-9846-cf0aeb3006d9)
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/mistral/Mistral-7b-example/code.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
examples/mistral/Mistral-7b-example/data.jsonl DELETED
@@ -1,10 +0,0 @@
1
- {"conversations": [{"from": "Customer", "value": "\"<Customer>: Who is the Founder of Apple\""}, {"from": "gpt", "value": "\"<Chatbot>: The founder of Apple is Steve Jobs\""}]}
2
- {"conversations": [{"from": "Customer", "value": "\"<Customer>: What is the capital of France?\""}, {"from": "gpt", "value": "\"<Chatbot>: The capital of France is Paris.\""}]}
3
- {"conversations": [{"from": "Customer", "value": "\"<Customer>: How far is the Moon from Earth?\""}, {"from": "gpt", "value": "\"<Chatbot>: The Moon is approximately 384,400 kilometers from Earth.\""}]}
4
- {"conversations": [{"from": "Customer", "value": "\"<Customer>: What is the tallest mountain in the world?\""}, {"from": "gpt", "value": "\"<Chatbot>: The tallest mountain in the world is Mount Everest.\""}]}
5
- {"conversations": [{"from": "Customer", "value": "\"<Customer>: Who wrote Romeo and Juliet?\""}, {"from": "gpt", "value": "\"<Chatbot>: Romeo and Juliet was written by William Shakespeare.\""}]}
6
- {"conversations": [{"from": "Customer", "value": "\"<Customer>: What is the boiling point of water?\""}, {"from": "gpt", "value": "\"<Chatbot>: The boiling point of water is 100 degrees Celsius.\""}]}
7
- {"conversations": [{"from": "Customer", "value": "\"<Customer>: When was the first man on the moon?\""}, {"from": "gpt", "value": "\"<Chatbot>: The first man landed on the moon in 1969.\""}]}
8
- {"conversations": [{"from": "Customer", "value": "\"<Customer>: What is the largest ocean?\""}, {"from": "gpt", "value": "\"<Chatbot>: The largest ocean is the Pacific Ocean.\""}]}
9
- {"conversations": [{"from": "Customer", "value": "\"<Customer>: Who invented the telephone?\""}, {"from": "gpt", "value": "\"<Chatbot>: The telephone was invented by Alexander Graham Bell.\""}]}
10
- {"conversations": [{"from": "Customer", "value": "\"<Customer>: What is the formula for water?\""}, {"from": "gpt", "value": "\"<Chatbot>: The chemical formula for water is H2O.\""}]}
 
 
 
 
 
 
 
 
 
 
 
examples/mistral/config.yml CHANGED
@@ -56,6 +56,3 @@ weight_decay: 0.0
56
  fsdp:
57
  fsdp_config:
58
  special_tokens:
59
- bos_token: "<s>"
60
- eos_token: "</s>"
61
- unk_token: "<unk>"
 
56
  fsdp:
57
  fsdp_config:
58
  special_tokens:
 
 
 
examples/mistral/{Mistral-7b-example/config.yml → lora.yml} RENAMED
@@ -1,4 +1,3 @@
1
- #Mistral-7b
2
  base_model: mistralai/Mistral-7B-v0.1
3
  model_type: MistralForCausalLM
4
  tokenizer_type: LlamaTokenizer
@@ -8,26 +7,32 @@ load_in_4bit: false
8
  strict: false
9
 
10
  datasets:
11
- - path: tilemachos/Demo-Dataset #Path to json dataset file in huggingface
12
- #for type,conversation arguments read axolotl readme and pick what is suited for your project, I wanted a chatbot and put sharegpt and chatml
13
- type: sharegpt
14
- conversation: chatml
15
- dataset_prepared_path: tilemachos/Demo-Dataset #Path to json dataset file in huggingface
16
- val_set_size: 0.05
17
- output_dir: ./out
18
 
19
- #using lora for lower cost
20
  adapter: lora
21
- lora_r: 8
 
 
 
 
 
 
22
  lora_alpha: 16
23
  lora_dropout: 0.05
 
 
24
  lora_target_modules:
 
 
 
25
  - q_proj
26
  - v_proj
27
-
28
- sequence_len: 512
29
- sample_packing: false
30
- pad_to_sequence_len: true
31
 
32
  wandb_project:
33
  wandb_entity:
@@ -35,18 +40,17 @@ wandb_watch:
35
  wandb_name:
36
  wandb_log_model:
37
 
38
- #only 2 epochs because of small dataset
39
- gradient_accumulation_steps: 3
40
  micro_batch_size: 2
41
- num_epochs: 2
42
  optimizer: adamw_bnb_8bit
43
  lr_scheduler: cosine
44
  learning_rate: 0.0002
45
 
46
  train_on_inputs: false
47
  group_by_length: false
48
- bf16: true
49
- fp16: false
50
  tf32: false
51
 
52
  gradient_checkpointing: true
@@ -57,18 +61,17 @@ logging_steps: 1
57
  xformers_attention:
58
  flash_attention: true
59
 
 
 
 
60
  warmup_steps: 10
61
  evals_per_epoch: 4
62
  eval_table_size:
63
  eval_max_new_tokens: 128
64
  saves_per_epoch: 1
65
  debug:
66
- #default deepspeed, can use more aggresive if needed like zero2, zero3
67
- deepspeed: deepspeed_configs/zero1.json
68
  weight_decay: 0.0
69
  fsdp:
70
  fsdp_config:
71
  special_tokens:
72
- bos_token: "<s>"
73
- eos_token: "</s>"
74
- unk_token: "<unk>"
 
 
1
  base_model: mistralai/Mistral-7B-v0.1
2
  model_type: MistralForCausalLM
3
  tokenizer_type: LlamaTokenizer
 
7
  strict: false
8
 
9
  datasets:
10
+ - path: mhenrichsen/alpaca_2k_test
11
+ type: alpaca
12
+ dataset_prepared_path: last_run_prepared
13
+ val_set_size: 0.1
14
+ output_dir: ./lora-out
 
 
15
 
 
16
  adapter: lora
17
+ lora_model_dir:
18
+
19
+ sequence_len: 8192
20
+ sample_packing: true
21
+ pad_to_sequence_len: true
22
+
23
+ lora_r: 32
24
  lora_alpha: 16
25
  lora_dropout: 0.05
26
+ lora_target_linear: true
27
+ lora_fan_in_fan_out:
28
  lora_target_modules:
29
+ - gate_proj
30
+ - down_proj
31
+ - up_proj
32
  - q_proj
33
  - v_proj
34
+ - k_proj
35
+ - o_proj
 
 
36
 
37
  wandb_project:
38
  wandb_entity:
 
40
  wandb_name:
41
  wandb_log_model:
42
 
43
+ gradient_accumulation_steps: 4
 
44
  micro_batch_size: 2
45
+ num_epochs: 1
46
  optimizer: adamw_bnb_8bit
47
  lr_scheduler: cosine
48
  learning_rate: 0.0002
49
 
50
  train_on_inputs: false
51
  group_by_length: false
52
+ bf16: auto
53
+ fp16:
54
  tf32: false
55
 
56
  gradient_checkpointing: true
 
61
  xformers_attention:
62
  flash_attention: true
63
 
64
+ loss_watchdog_threshold: 5.0
65
+ loss_watchdog_patience: 3
66
+
67
  warmup_steps: 10
68
  evals_per_epoch: 4
69
  eval_table_size:
70
  eval_max_new_tokens: 128
71
  saves_per_epoch: 1
72
  debug:
73
+ deepspeed:
 
74
  weight_decay: 0.0
75
  fsdp:
76
  fsdp_config:
77
  special_tokens:
 
 
 
examples/mistral/qlora.yml CHANGED
@@ -75,6 +75,3 @@ weight_decay: 0.0
75
  fsdp:
76
  fsdp_config:
77
  special_tokens:
78
- bos_token: "<s>"
79
- eos_token: "</s>"
80
- unk_token: "<unk>"
 
75
  fsdp:
76
  fsdp_config:
77
  special_tokens: