diff --git a/docker/Dockerfile-cloud b/docker/Dockerfile-cloud index 69ce143bb22118284eb5ae3cc68e5601fffb50ee..cc8c58415b25127fb57a2a1b13271e28915f4ae6 100644 --- a/docker/Dockerfile-cloud +++ b/docker/Dockerfile-cloud @@ -21,7 +21,9 @@ RUN apt install --yes --no-install-recommends openssh-server tmux && \ printf "\n[[ -z \"\$TMUX\" ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \ printf "[ ! -z \"\$TERM\" -a -r /etc/motd ] && cat /etc/motd\n" >> ~/.bashrc && \ chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \ - chmod +x /root/cloud-entrypoint.sh + chmod +x /root/cloud-entrypoint.sh && \ + mkdir -p /workspace/data/axolotl-artifacts && \ + ln -sf /workspace/data/axolotl-artifacts /workspace/axolotl/outputs ENTRYPOINT ["/root/cloud-entrypoint.sh"] CMD ["sleep", "infinity"] diff --git a/examples/cerebras/btlm-ft.yml b/examples/cerebras/btlm-ft.yml index 18dd86e6b432ebc497a44cb3cfd1b3088a8970ec..ba4e65daaef2149c3269d85a8eb3ef3ecb7a14ff 100644 --- a/examples/cerebras/btlm-ft.yml +++ b/examples/cerebras/btlm-ft.yml @@ -38,7 +38,7 @@ wandb_watch: wandb_name: wandb_log_model: -output_dir: btlm-out +output_dir: ./outputs/btlm-out gradient_accumulation_steps: 1 micro_batch_size: 1 num_epochs: 1 diff --git a/examples/cerebras/qlora.yml b/examples/cerebras/qlora.yml index c4f44326c2bebeb84d086447d7cdc1715375e36b..285607a4c81aff6141d3cca982c6810f48e52d35 100644 --- a/examples/cerebras/qlora.yml +++ b/examples/cerebras/qlora.yml @@ -25,7 +25,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out batch_size: 4 micro_batch_size: 4 num_epochs: 2 diff --git a/examples/code-llama/13b/lora.yml b/examples/code-llama/13b/lora.yml index ce5a892d08d4df962070bdb07533c08a2444f5ac..0ba96cfaa7410a004d3afd043c87b42c27682a4a 100644 --- a/examples/code-llama/13b/lora.yml +++ b/examples/code-llama/13b/lora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 4096 sample_packing: true diff --git a/examples/code-llama/13b/qlora.yml b/examples/code-llama/13b/qlora.yml index d822e6847068b2400c65409d41d29d667971732f..787862d0102b944023fcf12b6aa14da9eee701a3 100644 --- a/examples/code-llama/13b/qlora.yml +++ b/examples/code-llama/13b/qlora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out adapter: qlora lora_model_dir: diff --git a/examples/code-llama/34b/lora.yml b/examples/code-llama/34b/lora.yml index dfef2538b0ce3d618b7b484990f53cd5d7166bf5..92d4c544a3fedb426f567a3d4bb634c8ef96108d 100644 --- a/examples/code-llama/34b/lora.yml +++ b/examples/code-llama/34b/lora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 4096 sample_packing: true diff --git a/examples/code-llama/34b/qlora.yml b/examples/code-llama/34b/qlora.yml index 77f821e1c830787c496deeda07eab22c04d5c806..93a6de877778e51b9a20c986d43fe3575e17155d 100644 --- a/examples/code-llama/34b/qlora.yml +++ b/examples/code-llama/34b/qlora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out adapter: qlora lora_model_dir: diff --git a/examples/code-llama/7b/lora.yml b/examples/code-llama/7b/lora.yml index 3e6c7fe620af2b513f2fbc1372cc19ba0fd907cd..d13f505325d30efa186c70b635a1eef5233e914f 100644 --- a/examples/code-llama/7b/lora.yml +++ b/examples/code-llama/7b/lora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 4096 sample_packing: true diff --git a/examples/code-llama/7b/qlora.yml b/examples/code-llama/7b/qlora.yml index e817b113cc0e63941533b79630d4fde60369b942..a1026a982d04e37d56900db0848fcd627d202987 100644 --- a/examples/code-llama/7b/qlora.yml +++ b/examples/code-llama/7b/qlora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out adapter: qlora lora_model_dir: diff --git a/examples/colab-notebooks/colab-axolotl-example.ipynb b/examples/colab-notebooks/colab-axolotl-example.ipynb index 9adbe000476592250808b229a3df3eb32ef4e630..fc3b76194961ec170798018369cb149f5c8fa3cc 100644 --- a/examples/colab-notebooks/colab-axolotl-example.ipynb +++ b/examples/colab-notebooks/colab-axolotl-example.ipynb @@ -84,7 +84,7 @@ " type: alpaca\n", "dataset_prepared_path:\n", "val_set_size: 0.05\n", - "output_dir: ./qlora-out\n", + "output_dir: ./outputs/qlora-out\n", "\n", "adapter: qlora\n", "lora_model_dir:\n", diff --git a/examples/dbrx/16bit-lora.yaml b/examples/dbrx/16bit-lora.yaml index e5e3ea9216e06e1cce718f1487d2a8fc6ee7b845..32b625ac6965b9accdd764e6b22143b8adbedfcb 100644 --- a/examples/dbrx/16bit-lora.yaml +++ b/examples/dbrx/16bit-lora.yaml @@ -10,7 +10,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.0 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 512 sample_packing: false diff --git a/examples/dbrx/8bit-lora.yaml b/examples/dbrx/8bit-lora.yaml index 89e24db05800e3a4d886fc4bedeaa715227998fe..50ee0a016411dfdf2d36ac4bd828a54c4be660d5 100644 --- a/examples/dbrx/8bit-lora.yaml +++ b/examples/dbrx/8bit-lora.yaml @@ -10,7 +10,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.0 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 512 sample_packing: false diff --git a/examples/dbrx/fft-ds-zero3.yaml b/examples/dbrx/fft-ds-zero3.yaml index 68292707a4092805b2f4980e0a4b3bc4b0b1cead..60dc201eee03bbe9a72131f63ff234c17afaa127 100644 --- a/examples/dbrx/fft-ds-zero3.yaml +++ b/examples/dbrx/fft-ds-zero3.yaml @@ -10,7 +10,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.0 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 512 sample_packing: false diff --git a/examples/falcon/config-7b-lora.yml b/examples/falcon/config-7b-lora.yml index 5be9c6425326a5e651680f410a125303449dd08c..029ca40e0902377b1a6d019b3363c28be26dcb33 100644 --- a/examples/falcon/config-7b-lora.yml +++ b/examples/falcon/config-7b-lora.yml @@ -28,7 +28,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./falcon-7b +output_dir: ./outputs/falcon-7b batch_size: 2 micro_batch_size: 1 num_epochs: 4 diff --git a/examples/falcon/config-7b-qlora.yml b/examples/falcon/config-7b-qlora.yml index eb1cdfcdba27a2e993446fc11e6922d1d8786369..4e34144ed6dc148d08c7f2ecc4f5fb07cbe187b1 100644 --- a/examples/falcon/config-7b-qlora.yml +++ b/examples/falcon/config-7b-qlora.yml @@ -42,7 +42,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out # QLoRA paper Table 9 # - 16 for 7b & 13b diff --git a/examples/falcon/config-7b.yml b/examples/falcon/config-7b.yml index 1dd46a93ff217faabcb5be37eeaa6a6489149133..36264f063e3aa96f7d5263a2df5c84ad8cc6692a 100644 --- a/examples/falcon/config-7b.yml +++ b/examples/falcon/config-7b.yml @@ -28,7 +28,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./falcon-7b +output_dir: ./outputs/falcon-7b batch_size: 2 micro_batch_size: 1 num_epochs: 4 diff --git a/examples/gemma/qlora.yml b/examples/gemma/qlora.yml index 619a4012911c4b0537100de2a5cc9a1791aeb5dd..e08facfc5d4541efdb779a2cfb1dc6949b115505 100644 --- a/examples/gemma/qlora.yml +++ b/examples/gemma/qlora.yml @@ -12,7 +12,7 @@ datasets: - path: mhenrichsen/alpaca_2k_test type: alpaca val_set_size: 0.1 -output_dir: ./out +output_dir: ./outputs/out adapter: qlora lora_r: 32 diff --git a/examples/gptj/qlora.yml b/examples/gptj/qlora.yml index cd3f2e2ad78b3d4f75898651126db420443a5547..f801729fac9833ea69e70e45319b17565fa6d7d8 100644 --- a/examples/gptj/qlora.yml +++ b/examples/gptj/qlora.yml @@ -23,7 +23,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out gradient_accumulation_steps: 2 micro_batch_size: 2 num_epochs: 2 diff --git a/examples/jamba/qlora.yaml b/examples/jamba/qlora.yaml index 41a3854fe1b01577a1404d53ce3eaffb981890cc..3d6f69e793c8367daed54fec7287ca16aec23068 100644 --- a/examples/jamba/qlora.yaml +++ b/examples/jamba/qlora.yaml @@ -10,7 +10,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.0 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 4096 sample_packing: false diff --git a/examples/jamba/qlora_deepspeed.yaml b/examples/jamba/qlora_deepspeed.yaml index ef04fb53fec5e9f82d78622950059390a3dc185d..43a76c00b106391ba5f6ac9e94365bfaf98c1218 100644 --- a/examples/jamba/qlora_deepspeed.yaml +++ b/examples/jamba/qlora_deepspeed.yaml @@ -10,7 +10,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.0 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 4096 sample_packing: false diff --git a/examples/jeopardy-bot/config.yml b/examples/jeopardy-bot/config.yml index a672c7b94f46301bb6226d1ca05318681a87cff5..088629c0840aa17641e3924ae4bf79b6204ad46e 100644 --- a/examples/jeopardy-bot/config.yml +++ b/examples/jeopardy-bot/config.yml @@ -21,7 +21,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./jeopardy-bot-7b +output_dir: ./outputs/jeopardy-bot-7b gradient_accumulation_steps: 1 micro_batch_size: 1 num_epochs: 4 diff --git a/examples/llama-2/fft_optimized.yml b/examples/llama-2/fft_optimized.yml index 74edc95e6bcee7b8792590e40f68f4bc8d2a6f2c..3d94b04b8b7029ac8f6ca8ac4cce925a8afc6913 100644 --- a/examples/llama-2/fft_optimized.yml +++ b/examples/llama-2/fft_optimized.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.05 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 4096 sample_packing: true diff --git a/examples/llama-2/gptq-lora.yml b/examples/llama-2/gptq-lora.yml index 68ca9ed31c6c5b76d4e319aee7a7267064219f02..2a706265bdde1be7e96047593074f01ff3116766 100644 --- a/examples/llama-2/gptq-lora.yml +++ b/examples/llama-2/gptq-lora.yml @@ -33,7 +33,7 @@ wandb_project: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./model-out +output_dir: ./outputs/model-out gradient_accumulation_steps: 1 micro_batch_size: 1 num_epochs: 4 diff --git a/examples/llama-2/lisa.yml b/examples/llama-2/lisa.yml index e692c7ac1e56f8ff7ddc33444fbf43451760e81a..7012d1f6131215eebad688762f8811c1183553d3 100644 --- a/examples/llama-2/lisa.yml +++ b/examples/llama-2/lisa.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.05 -output_dir: ./lisa-out +output_dir: ./outputs/lisa-out sequence_len: 4096 sample_packing: true diff --git a/examples/llama-2/loftq.yml b/examples/llama-2/loftq.yml index 4529a912dc3a14fa319a5bc2c552219551715818..68d9ac01421e1c841b941b217889b914d95da8de 100644 --- a/examples/llama-2/loftq.yml +++ b/examples/llama-2/loftq.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 4096 sample_packing: true diff --git a/examples/llama-2/lora.yml b/examples/llama-2/lora.yml index a7793dce4cbe5fcfa314ad1595db7cc84adcc5b5..95bfae6920b4f9734d8bffb433ef6435097cb724 100644 --- a/examples/llama-2/lora.yml +++ b/examples/llama-2/lora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 4096 sample_packing: true diff --git a/examples/llama-2/qlora-fsdp.yml b/examples/llama-2/qlora-fsdp.yml index 93b3b2a60ad2068b8fcb516eaf1155ebed13da07..88029f92d5bcb2c02ede8b0fe8df2ae451803a1f 100644 --- a/examples/llama-2/qlora-fsdp.yml +++ b/examples/llama-2/qlora-fsdp.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.05 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out adapter: qlora lora_model_dir: diff --git a/examples/llama-2/qlora.yml b/examples/llama-2/qlora.yml index 834dbfb33a65dcefc1e8298d74a35bf75a6eafb8..dda32170bd812028cf377e15c7a36323cafe0ca1 100644 --- a/examples/llama-2/qlora.yml +++ b/examples/llama-2/qlora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out adapter: qlora lora_model_dir: diff --git a/examples/llama-2/relora.yml b/examples/llama-2/relora.yml index 9fd19953c60190e71cc8326ec52405f26b6b9080..93247ce068abf39db2e1de2fb3d79c82f1ea3f37 100644 --- a/examples/llama-2/relora.yml +++ b/examples/llama-2/relora.yml @@ -12,7 +12,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./relora-out +output_dir: ./outputs/relora-out adapter: qlora lora_model_dir: diff --git a/examples/llama-3/fft-8b.yaml b/examples/llama-3/fft-8b.yaml index 8c9ba90bfe89bfbd9cc7f0db4173029ba5b2b802..a36fd740e417627859a26136ef4cb3cd94f8552e 100644 --- a/examples/llama-3/fft-8b.yaml +++ b/examples/llama-3/fft-8b.yaml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.05 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 8192 sample_packing: true diff --git a/examples/llama-3/lora-8b.yml b/examples/llama-3/lora-8b.yml index d60f8a303549913940a91ecbbbfc703abacaae0d..6b0ebaed86cdcc9079f64798d648aa7a37529ac2 100644 --- a/examples/llama-3/lora-8b.yml +++ b/examples/llama-3/lora-8b.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 4096 sample_packing: true diff --git a/examples/llama-3/qlora-fsdp-70b.yaml b/examples/llama-3/qlora-fsdp-70b.yaml index 8d8785bfd588590ec307a33ac16adc260fad5b44..9b74f6b4de687b9c0e859d1f05f1e169c15421ab 100644 --- a/examples/llama-3/qlora-fsdp-70b.yaml +++ b/examples/llama-3/qlora-fsdp-70b.yaml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.05 -output_dir: ./out/qlora-llama3-70b +output_dir: ./outputs/out/qlora-llama3-70b adapter: qlora lora_model_dir: diff --git a/examples/llama-3/qlora.yml b/examples/llama-3/qlora.yml index 9cedee8eec208d19286a286c875ec4e0bd7b8b9b..44120d9385db8da0d7d67c96ab689726e06ccd95 100644 --- a/examples/llama-3/qlora.yml +++ b/examples/llama-3/qlora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out adapter: qlora lora_model_dir: diff --git a/examples/mamba/config.yml b/examples/mamba/config.yml index 0a5223bcac7dd5cbe505522696e2b38aa3e81be1..f88f5138d916055fe3b4e47fd39f575d5c151771 100644 --- a/examples/mamba/config.yml +++ b/examples/mamba/config.yml @@ -12,7 +12,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.0 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 2048 sample_packing: false diff --git a/examples/mistral/bigstral-ds-zero3.yaml b/examples/mistral/bigstral-ds-zero3.yaml index cc0a44b2a404c0cd9df67fedacc830a02f50139b..e993e44a7862be45ffd1a173f17e07a7aa2a59b7 100644 --- a/examples/mistral/bigstral-ds-zero3.yaml +++ b/examples/mistral/bigstral-ds-zero3.yaml @@ -23,7 +23,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.05 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 2048 sample_packing: true diff --git a/examples/mistral/config.yml b/examples/mistral/config.yml index c909c63e2213b0916a449c554eb4b60943823107..a70937c4fd714b88edb5cee3883aab2f34be43b0 100644 --- a/examples/mistral/config.yml +++ b/examples/mistral/config.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 8192 sample_packing: true diff --git a/examples/mistral/lora-mps.yml b/examples/mistral/lora-mps.yml index 31b0d527e2bb0379256d3153f0ea19e90b59534c..03c74bb59bbca7c6eb062c68b92ad9b704fff108 100644 --- a/examples/mistral/lora-mps.yml +++ b/examples/mistral/lora-mps.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0 -output_dir: ./lora-out +output_dir: ./outputs/lora-out eval_sample_packing: false adapter: lora diff --git a/examples/mistral/lora.yml b/examples/mistral/lora.yml index ac9ac0dd982ff835144d3fa6f1cd5afb094341e3..0d5dc9edd7c9c76adb5aa05d67832e0f9abb6d47 100644 --- a/examples/mistral/lora.yml +++ b/examples/mistral/lora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.1 -output_dir: ./lora-out +output_dir: ./outputs/lora-out adapter: lora lora_model_dir: diff --git a/examples/mistral/mistral-qlora-fsdp.yml b/examples/mistral/mistral-qlora-fsdp.yml index 71ac1e701f907ab774fe573f0c712bde6bee34e1..e6b07c594b1e4f2ec3fecf962a9e0c32407f3eee 100644 --- a/examples/mistral/mistral-qlora-fsdp.yml +++ b/examples/mistral/mistral-qlora-fsdp.yml @@ -12,7 +12,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.02 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out model_config: output_router_logits: true diff --git a/examples/mistral/mistral-qlora-orpo.yml b/examples/mistral/mistral-qlora-orpo.yml index 7727fd7485c83e68c4690a1f4b7209e1079a128b..2549ef018c6e065597e80030a0dc7f05d62ed51c 100644 --- a/examples/mistral/mistral-qlora-orpo.yml +++ b/examples/mistral/mistral-qlora-orpo.yml @@ -16,7 +16,7 @@ datasets: type: chat_template.argilla dataset_prepared_path: last_run_prepared val_set_size: 0.1 -output_dir: ./mistral-qlora-orpo-out +output_dir: ./outputs/mistral-qlora-orpo-out adapter: qlora lora_model_dir: diff --git a/examples/mistral/mixtral-8x22b-qlora-fsdp.yml b/examples/mistral/mixtral-8x22b-qlora-fsdp.yml index ac80a2a7561f46537e4b9e8a18dc7a42d656a696..fe68b28172901ce8615174da984df916bdab2cee 100644 --- a/examples/mistral/mixtral-8x22b-qlora-fsdp.yml +++ b/examples/mistral/mixtral-8x22b-qlora-fsdp.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.02 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out model_config: output_router_logits: true diff --git a/examples/mistral/mixtral-qlora-fsdp.yml b/examples/mistral/mixtral-qlora-fsdp.yml index b6a07ae51cc3df2d90f82187f936a26426fa803e..c0959704027713d5dde9901e376538df17223c2e 100644 --- a/examples/mistral/mixtral-qlora-fsdp.yml +++ b/examples/mistral/mixtral-qlora-fsdp.yml @@ -12,7 +12,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.02 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out model_config: output_router_logits: true diff --git a/examples/mistral/mixtral.yml b/examples/mistral/mixtral.yml index 5ee3da9d65981de3185559cdbbf6daa056c6dba8..13fbe92ab8d7fd4d96444835728c0928d95cfcbe 100644 --- a/examples/mistral/mixtral.yml +++ b/examples/mistral/mixtral.yml @@ -12,7 +12,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.0 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out ## You can optionally freeze the entire model and unfreeze a subset of parameters unfrozen_parameters: diff --git a/examples/mistral/mixtral_22.yml b/examples/mistral/mixtral_22.yml index 9abb6f407a8c9c2186e6a22d72108439e6933aa5..9a1e86386c28963902c1bfb2eb405d4530ae3244 100644 --- a/examples/mistral/mixtral_22.yml +++ b/examples/mistral/mixtral_22.yml @@ -21,7 +21,7 @@ model_config: datasets: - path: yahma/alpaca-cleaned type: alpaca -output_dir: ./out +output_dir: ./outputs/out sequence_len: 8000 sample_packing: true diff --git a/examples/mistral/qlora.yml b/examples/mistral/qlora.yml index 6fbbb96183d1a3382f663d2bf8c9ba7eb5e0d7eb..c7bdb155c0ec69afc0b7147e49d4ca30c141853e 100644 --- a/examples/mistral/qlora.yml +++ b/examples/mistral/qlora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.1 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out adapter: qlora lora_model_dir: diff --git a/examples/mpt-7b/config.yml b/examples/mpt-7b/config.yml index 45e31266f1a73057c2f4cc1aabe7b01749a8bc5e..530415de1703ab68ff64451a0c6caf109e4d46f1 100644 --- a/examples/mpt-7b/config.yml +++ b/examples/mpt-7b/config.yml @@ -23,7 +23,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./mpt-alpaca-7b +output_dir: ./outputs/mpt-alpaca-7b gradient_accumulation_steps: 1 micro_batch_size: 1 num_epochs: 4 diff --git a/examples/openllama-3b/config.yml b/examples/openllama-3b/config.yml index 0a404c79d85114359412622dbc642117a5fab7f7..a0473213c033efe5e858fe278177b7caa3310cda 100644 --- a/examples/openllama-3b/config.yml +++ b/examples/openllama-3b/config.yml @@ -25,7 +25,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./openllama-out +output_dir: ./outputs/openllama-out gradient_accumulation_steps: 1 micro_batch_size: 1 num_epochs: 4 diff --git a/examples/openllama-3b/lora.yml b/examples/openllama-3b/lora.yml index b83b2db4e4aa03ffeb4b563fa82092cf72be960b..2b67849159904e990c881285256837e89bb4fa7d 100644 --- a/examples/openllama-3b/lora.yml +++ b/examples/openllama-3b/lora.yml @@ -31,7 +31,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./lora-out +output_dir: ./outputs/lora-out gradient_accumulation_steps: 1 micro_batch_size: 2 num_epochs: 4 diff --git a/examples/openllama-3b/qlora.yml b/examples/openllama-3b/qlora.yml index 3d6218b308a3133b35a1507a04360f8ce094a204..8d4dc05ca7594e6f561cf192f03f092662610b76 100644 --- a/examples/openllama-3b/qlora.yml +++ b/examples/openllama-3b/qlora.yml @@ -25,7 +25,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out gradient_accumulation_steps: 1 micro_batch_size: 2 num_epochs: 4 diff --git a/examples/phi/phi-ft.yml b/examples/phi/phi-ft.yml index b21386f7077c10f78e5062a41f4e394a2ff85dac..0dabadc7a4e68788d060ad6ba1e174eef22a7f5f 100644 --- a/examples/phi/phi-ft.yml +++ b/examples/phi/phi-ft.yml @@ -12,7 +12,7 @@ datasets: dataset_prepared_path: val_set_size: 0.05 -output_dir: ./phi-sft-out +output_dir: ./outputs/phi-sft-out sequence_len: 2048 sample_packing: true diff --git a/examples/phi/phi-qlora.yml b/examples/phi/phi-qlora.yml index d2b5d661c9cf6d6a403883b86a54a6a7017234b2..7c181a3c15968cad2d0df2c42132f3a55b8d1b17 100644 --- a/examples/phi/phi-qlora.yml +++ b/examples/phi/phi-qlora.yml @@ -12,7 +12,7 @@ datasets: dataset_prepared_path: val_set_size: 0.05 -output_dir: ./phi-sft-out +output_dir: ./outputs/phi-sft-out sequence_len: 2048 sample_packing: true diff --git a/examples/phi/phi2-ft.yml b/examples/phi/phi2-ft.yml index 7a2d05d0189e7f2f0da4d51ec672318d287a37fc..27815550b4c8a4d37a07c47c1ac30102693a2d09 100644 --- a/examples/phi/phi2-ft.yml +++ b/examples/phi/phi2-ft.yml @@ -12,7 +12,7 @@ datasets: dataset_prepared_path: val_set_size: 0.05 -output_dir: ./phi-sft-out +output_dir: ./outputs/phi-sft-out sequence_len: 2048 sample_packing: true diff --git a/examples/pythia-12b/config.yml b/examples/pythia-12b/config.yml index e44bba74512ae7be47acbe1fef3b47f3ad2e8af4..18e6beaafd2ec0405de2ae68847430063e816396 100644 --- a/examples/pythia-12b/config.yml +++ b/examples/pythia-12b/config.yml @@ -26,7 +26,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./pythia-12b +output_dir: ./outputs/pythia-12b gradient_accumulation_steps: 1 micro_batch_size: 1 num_epochs: 5 diff --git a/examples/pythia/lora.yml b/examples/pythia/lora.yml index 7cb07fe2583dc0bcf16ab6fc4975c7b71e00f524..0aa650f67e807e1fe06f7fcc9d40cc409c073d5f 100644 --- a/examples/pythia/lora.yml +++ b/examples/pythia/lora.yml @@ -20,7 +20,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./lora-alpaca-pythia +output_dir: ./outputs/lora-alpaca-pythia gradient_accumulation_steps: 1 micro_batch_size: 4 num_epochs: 4 diff --git a/examples/qwen/lora.yml b/examples/qwen/lora.yml index da4d784e0a0925c8968337ce905d8352c3c2c91f..dd8dc1e4f4e9452874b1ae738247e7a45e2f9085 100644 --- a/examples/qwen/lora.yml +++ b/examples/qwen/lora.yml @@ -13,7 +13,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 2048 # supports up to 8192 sample_packing: false diff --git a/examples/qwen/qlora.yml b/examples/qwen/qlora.yml index 501a866b2d872adc94c09b3c864fd31c3e5bb986..01c0c0ab864ac424630a811a87d5d62665e5c205 100644 --- a/examples/qwen/qlora.yml +++ b/examples/qwen/qlora.yml @@ -13,7 +13,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 2048 # supports up to 8192 sample_packing: false diff --git a/examples/qwen/qwen2-moe-lora.yaml b/examples/qwen/qwen2-moe-lora.yaml index c59b282d0add721555168290d75bfc4eb4315af4..452335e38f1bc81fc2e1e1de0a0ee19480f8be9d 100644 --- a/examples/qwen/qwen2-moe-lora.yaml +++ b/examples/qwen/qwen2-moe-lora.yaml @@ -10,7 +10,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 1024 # supports up to 32k sample_packing: false diff --git a/examples/qwen/qwen2-moe-qlora.yaml b/examples/qwen/qwen2-moe-qlora.yaml index d6a835a0a30bd4d7d1bd70d12187def02e47ec43..bc11007c78777bf11fc28bfe733752470e4bf046 100644 --- a/examples/qwen/qwen2-moe-qlora.yaml +++ b/examples/qwen/qwen2-moe-qlora.yaml @@ -10,7 +10,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 1024 # supports up to 32k sample_packing: false diff --git a/examples/redpajama/config-3b.yml b/examples/redpajama/config-3b.yml index 5a42e2a9520110882a9952cd7a6bfe68185f79d7..ff395a863df28927fbbf4c86394cd9c8cc30ec3f 100644 --- a/examples/redpajama/config-3b.yml +++ b/examples/redpajama/config-3b.yml @@ -24,7 +24,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./redpajama-alpaca-3b +output_dir: ./outputs/redpajama-alpaca-3b batch_size: 4 micro_batch_size: 1 num_epochs: 4 diff --git a/examples/replit-3b/config-lora.yml b/examples/replit-3b/config-lora.yml index bdfe1bd854bfcfcca571259c574e7088500c4cfb..9fee099d474f09acf553ca85e0d9de54461ad996 100644 --- a/examples/replit-3b/config-lora.yml +++ b/examples/replit-3b/config-lora.yml @@ -23,7 +23,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./lora-replit +output_dir: ./outputs/lora-replit batch_size: 8 micro_batch_size: 1 num_epochs: 4 diff --git a/examples/stablelm-2/1.6b/fft.yml b/examples/stablelm-2/1.6b/fft.yml index f3fc16f867bcc11d707820778d3bb500d3a2aed2..777262a7ee43de7cb990d276ec032d8b47960620 100644 --- a/examples/stablelm-2/1.6b/fft.yml +++ b/examples/stablelm-2/1.6b/fft.yml @@ -12,7 +12,7 @@ datasets: type: alpaca dataset_prepared_path: last_run_prepared val_set_size: 0.05 -output_dir: ./out +output_dir: ./outputs/out sequence_len: 4096 sample_packing: true diff --git a/examples/stablelm-2/1.6b/lora.yml b/examples/stablelm-2/1.6b/lora.yml index c5051fab6e6e24134f01ee56b48b742c6e95995a..c65b9e4cd0617afb898bb69533ddc7c7778c4b0b 100644 --- a/examples/stablelm-2/1.6b/lora.yml +++ b/examples/stablelm-2/1.6b/lora.yml @@ -12,7 +12,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 4096 sample_packing: true diff --git a/examples/starcoder2/qlora.yml b/examples/starcoder2/qlora.yml index 1efdfbc8e0905988936ef59c502c162ac8b2aae1..83fc0d89f76ba3c842f5011eee1d3d2c13d800ed 100644 --- a/examples/starcoder2/qlora.yml +++ b/examples/starcoder2/qlora.yml @@ -11,7 +11,7 @@ datasets: dataset_prepared_path: val_set_size: 0.2 -output_dir: ./qlora +output_dir: ./outputs/qlora adapter: qlora lora_model_dir: diff --git a/examples/tiny-llama/lora-mps.yml b/examples/tiny-llama/lora-mps.yml index fd7b02cacac16eaf2cabbac675adb38d38cceca7..c08be82d3b95e328b3dd03c5ad84011dc9547944 100644 --- a/examples/tiny-llama/lora-mps.yml +++ b/examples/tiny-llama/lora-mps.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 4096 sample_packing: true diff --git a/examples/tiny-llama/lora.yml b/examples/tiny-llama/lora.yml index 4a16f14b92d794d26a41be7e88ca1fe760c9c0a5..c5ff0437e8cc6dd892c3fa06a99f21a0f79abcf8 100644 --- a/examples/tiny-llama/lora.yml +++ b/examples/tiny-llama/lora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./lora-out +output_dir: ./outputs/lora-out sequence_len: 4096 sample_packing: true diff --git a/examples/tiny-llama/pretrain.yml b/examples/tiny-llama/pretrain.yml index 3b68a7f5477bfc96b3e79f86f5b9d4063b18f4d3..e501dcb8e590f7b156bf022b5ad2862ff33d3b5c 100644 --- a/examples/tiny-llama/pretrain.yml +++ b/examples/tiny-llama/pretrain.yml @@ -14,7 +14,7 @@ pretraining_dataset: type: pretrain dataset_prepared_path: val_set_size: 0.0 -output_dir: ./model-out +output_dir: ./outputs/model-out sequence_len: 2048 sample_packing: true diff --git a/examples/tiny-llama/qlora.yml b/examples/tiny-llama/qlora.yml index 3ea313c838cf69e930dcbe82e268efce049ffda3..0d21aca9d5fe824891b1097d7dccdb8d63551c23 100644 --- a/examples/tiny-llama/qlora.yml +++ b/examples/tiny-llama/qlora.yml @@ -11,7 +11,7 @@ datasets: type: alpaca dataset_prepared_path: val_set_size: 0.05 -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out adapter: qlora lora_model_dir: diff --git a/examples/xgen-7b/xgen-7b-8k-qlora.yml b/examples/xgen-7b/xgen-7b-8k-qlora.yml index e3faa01bdb79dd97abd816d2c8f9fd6b8bc3675c..7e3f83cbd7b2318c15d1c5ec904190ff5be09933 100644 --- a/examples/xgen-7b/xgen-7b-8k-qlora.yml +++ b/examples/xgen-7b/xgen-7b-8k-qlora.yml @@ -40,7 +40,7 @@ wandb_entity: wandb_watch: wandb_name: wandb_log_model: -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out # QLoRA paper Table 9 # - 16 for 7b & 13b diff --git a/examples/yi-34B-chat/qlora.yml b/examples/yi-34B-chat/qlora.yml index dc8c37d18796a13d17bda1aa2974224d2a0af15a..7fe322d63dc9840c251f46136963902d556df0e3 100644 --- a/examples/yi-34B-chat/qlora.yml +++ b/examples/yi-34B-chat/qlora.yml @@ -33,7 +33,7 @@ eval_sample_packing: false eval_batch_size: 1 # LoRA -output_dir: ./qlora-out +output_dir: ./outputs/qlora-out adapter: qlora lora_model_dir: lora_r: 32 diff --git a/outputs/.gitignore b/outputs/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..72e8ffc0db8aad71a934dd11e5968bd5109e54b4 --- /dev/null +++ b/outputs/.gitignore @@ -0,0 +1 @@ +*