diff --git a/docker/Dockerfile-cloud b/docker/Dockerfile-cloud
index 69ce143bb22118284eb5ae3cc68e5601fffb50ee..cc8c58415b25127fb57a2a1b13271e28915f4ae6 100644
--- a/docker/Dockerfile-cloud
+++ b/docker/Dockerfile-cloud
@@ -21,7 +21,9 @@ RUN apt install --yes --no-install-recommends openssh-server tmux && \
     printf "\n[[ -z \"\$TMUX\"  ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \
     printf "[ ! -z \"\$TERM\" -a -r /etc/motd ] && cat /etc/motd\n" >> ~/.bashrc && \
     chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \
-    chmod +x /root/cloud-entrypoint.sh
+    chmod +x /root/cloud-entrypoint.sh && \
+    mkdir -p /workspace/data/axolotl-artifacts && \
+    ln -sf /workspace/data/axolotl-artifacts /workspace/axolotl/outputs
 
 ENTRYPOINT ["/root/cloud-entrypoint.sh"]
 CMD ["sleep", "infinity"]
diff --git a/examples/cerebras/btlm-ft.yml b/examples/cerebras/btlm-ft.yml
index 18dd86e6b432ebc497a44cb3cfd1b3088a8970ec..ba4e65daaef2149c3269d85a8eb3ef3ecb7a14ff 100644
--- a/examples/cerebras/btlm-ft.yml
+++ b/examples/cerebras/btlm-ft.yml
@@ -38,7 +38,7 @@ wandb_watch:
 wandb_name:
 wandb_log_model:
 
-output_dir: btlm-out
+output_dir: ./outputs/btlm-out
 gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 1
diff --git a/examples/cerebras/qlora.yml b/examples/cerebras/qlora.yml
index c4f44326c2bebeb84d086447d7cdc1715375e36b..285607a4c81aff6141d3cca982c6810f48e52d35 100644
--- a/examples/cerebras/qlora.yml
+++ b/examples/cerebras/qlora.yml
@@ -25,7 +25,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 batch_size: 4
 micro_batch_size: 4
 num_epochs: 2
diff --git a/examples/code-llama/13b/lora.yml b/examples/code-llama/13b/lora.yml
index ce5a892d08d4df962070bdb07533c08a2444f5ac..0ba96cfaa7410a004d3afd043c87b42c27682a4a 100644
--- a/examples/code-llama/13b/lora.yml
+++ b/examples/code-llama/13b/lora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/code-llama/13b/qlora.yml b/examples/code-llama/13b/qlora.yml
index d822e6847068b2400c65409d41d29d667971732f..787862d0102b944023fcf12b6aa14da9eee701a3 100644
--- a/examples/code-llama/13b/qlora.yml
+++ b/examples/code-llama/13b/qlora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/code-llama/34b/lora.yml b/examples/code-llama/34b/lora.yml
index dfef2538b0ce3d618b7b484990f53cd5d7166bf5..92d4c544a3fedb426f567a3d4bb634c8ef96108d 100644
--- a/examples/code-llama/34b/lora.yml
+++ b/examples/code-llama/34b/lora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/code-llama/34b/qlora.yml b/examples/code-llama/34b/qlora.yml
index 77f821e1c830787c496deeda07eab22c04d5c806..93a6de877778e51b9a20c986d43fe3575e17155d 100644
--- a/examples/code-llama/34b/qlora.yml
+++ b/examples/code-llama/34b/qlora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/code-llama/7b/lora.yml b/examples/code-llama/7b/lora.yml
index 3e6c7fe620af2b513f2fbc1372cc19ba0fd907cd..d13f505325d30efa186c70b635a1eef5233e914f 100644
--- a/examples/code-llama/7b/lora.yml
+++ b/examples/code-llama/7b/lora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/code-llama/7b/qlora.yml b/examples/code-llama/7b/qlora.yml
index e817b113cc0e63941533b79630d4fde60369b942..a1026a982d04e37d56900db0848fcd627d202987 100644
--- a/examples/code-llama/7b/qlora.yml
+++ b/examples/code-llama/7b/qlora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/colab-notebooks/colab-axolotl-example.ipynb b/examples/colab-notebooks/colab-axolotl-example.ipynb
index 9adbe000476592250808b229a3df3eb32ef4e630..fc3b76194961ec170798018369cb149f5c8fa3cc 100644
--- a/examples/colab-notebooks/colab-axolotl-example.ipynb
+++ b/examples/colab-notebooks/colab-axolotl-example.ipynb
@@ -84,7 +84,7 @@
         "    type: alpaca\n",
         "dataset_prepared_path:\n",
         "val_set_size: 0.05\n",
-        "output_dir: ./qlora-out\n",
+        "output_dir: ./outputs/qlora-out\n",
         "\n",
         "adapter: qlora\n",
         "lora_model_dir:\n",
diff --git a/examples/dbrx/16bit-lora.yaml b/examples/dbrx/16bit-lora.yaml
index e5e3ea9216e06e1cce718f1487d2a8fc6ee7b845..32b625ac6965b9accdd764e6b22143b8adbedfcb 100644
--- a/examples/dbrx/16bit-lora.yaml
+++ b/examples/dbrx/16bit-lora.yaml
@@ -10,7 +10,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.0
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 512
 sample_packing: false
diff --git a/examples/dbrx/8bit-lora.yaml b/examples/dbrx/8bit-lora.yaml
index 89e24db05800e3a4d886fc4bedeaa715227998fe..50ee0a016411dfdf2d36ac4bd828a54c4be660d5 100644
--- a/examples/dbrx/8bit-lora.yaml
+++ b/examples/dbrx/8bit-lora.yaml
@@ -10,7 +10,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.0
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 512
 sample_packing: false
diff --git a/examples/dbrx/fft-ds-zero3.yaml b/examples/dbrx/fft-ds-zero3.yaml
index 68292707a4092805b2f4980e0a4b3bc4b0b1cead..60dc201eee03bbe9a72131f63ff234c17afaa127 100644
--- a/examples/dbrx/fft-ds-zero3.yaml
+++ b/examples/dbrx/fft-ds-zero3.yaml
@@ -10,7 +10,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.0
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 512
 sample_packing: false
diff --git a/examples/falcon/config-7b-lora.yml b/examples/falcon/config-7b-lora.yml
index 5be9c6425326a5e651680f410a125303449dd08c..029ca40e0902377b1a6d019b3363c28be26dcb33 100644
--- a/examples/falcon/config-7b-lora.yml
+++ b/examples/falcon/config-7b-lora.yml
@@ -28,7 +28,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./falcon-7b
+output_dir: ./outputs/falcon-7b
 batch_size: 2
 micro_batch_size: 1
 num_epochs: 4
diff --git a/examples/falcon/config-7b-qlora.yml b/examples/falcon/config-7b-qlora.yml
index eb1cdfcdba27a2e993446fc11e6922d1d8786369..4e34144ed6dc148d08c7f2ecc4f5fb07cbe187b1 100644
--- a/examples/falcon/config-7b-qlora.yml
+++ b/examples/falcon/config-7b-qlora.yml
@@ -42,7 +42,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 # QLoRA paper Table 9
 # - 16 for 7b & 13b
diff --git a/examples/falcon/config-7b.yml b/examples/falcon/config-7b.yml
index 1dd46a93ff217faabcb5be37eeaa6a6489149133..36264f063e3aa96f7d5263a2df5c84ad8cc6692a 100644
--- a/examples/falcon/config-7b.yml
+++ b/examples/falcon/config-7b.yml
@@ -28,7 +28,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./falcon-7b
+output_dir: ./outputs/falcon-7b
 batch_size: 2
 micro_batch_size: 1
 num_epochs: 4
diff --git a/examples/gemma/qlora.yml b/examples/gemma/qlora.yml
index 619a4012911c4b0537100de2a5cc9a1791aeb5dd..e08facfc5d4541efdb779a2cfb1dc6949b115505 100644
--- a/examples/gemma/qlora.yml
+++ b/examples/gemma/qlora.yml
@@ -12,7 +12,7 @@ datasets:
   - path: mhenrichsen/alpaca_2k_test
     type: alpaca
 val_set_size: 0.1
-output_dir: ./out
+output_dir: ./outputs/out
 
 adapter: qlora
 lora_r: 32
diff --git a/examples/gptj/qlora.yml b/examples/gptj/qlora.yml
index cd3f2e2ad78b3d4f75898651126db420443a5547..f801729fac9833ea69e70e45319b17565fa6d7d8 100644
--- a/examples/gptj/qlora.yml
+++ b/examples/gptj/qlora.yml
@@ -23,7 +23,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 gradient_accumulation_steps: 2
 micro_batch_size: 2
 num_epochs: 2
diff --git a/examples/jamba/qlora.yaml b/examples/jamba/qlora.yaml
index 41a3854fe1b01577a1404d53ce3eaffb981890cc..3d6f69e793c8367daed54fec7287ca16aec23068 100644
--- a/examples/jamba/qlora.yaml
+++ b/examples/jamba/qlora.yaml
@@ -10,7 +10,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.0
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 4096
 sample_packing: false
diff --git a/examples/jamba/qlora_deepspeed.yaml b/examples/jamba/qlora_deepspeed.yaml
index ef04fb53fec5e9f82d78622950059390a3dc185d..43a76c00b106391ba5f6ac9e94365bfaf98c1218 100644
--- a/examples/jamba/qlora_deepspeed.yaml
+++ b/examples/jamba/qlora_deepspeed.yaml
@@ -10,7 +10,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.0
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 4096
 sample_packing: false
diff --git a/examples/jeopardy-bot/config.yml b/examples/jeopardy-bot/config.yml
index a672c7b94f46301bb6226d1ca05318681a87cff5..088629c0840aa17641e3924ae4bf79b6204ad46e 100644
--- a/examples/jeopardy-bot/config.yml
+++ b/examples/jeopardy-bot/config.yml
@@ -21,7 +21,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./jeopardy-bot-7b
+output_dir: ./outputs/jeopardy-bot-7b
 gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 4
diff --git a/examples/llama-2/fft_optimized.yml b/examples/llama-2/fft_optimized.yml
index 74edc95e6bcee7b8792590e40f68f4bc8d2a6f2c..3d94b04b8b7029ac8f6ca8ac4cce925a8afc6913 100644
--- a/examples/llama-2/fft_optimized.yml
+++ b/examples/llama-2/fft_optimized.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.05
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/llama-2/gptq-lora.yml b/examples/llama-2/gptq-lora.yml
index 68ca9ed31c6c5b76d4e319aee7a7267064219f02..2a706265bdde1be7e96047593074f01ff3116766 100644
--- a/examples/llama-2/gptq-lora.yml
+++ b/examples/llama-2/gptq-lora.yml
@@ -33,7 +33,7 @@ wandb_project:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./model-out
+output_dir: ./outputs/model-out
 gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 4
diff --git a/examples/llama-2/lisa.yml b/examples/llama-2/lisa.yml
index e692c7ac1e56f8ff7ddc33444fbf43451760e81a..7012d1f6131215eebad688762f8811c1183553d3 100644
--- a/examples/llama-2/lisa.yml
+++ b/examples/llama-2/lisa.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.05
-output_dir: ./lisa-out
+output_dir: ./outputs/lisa-out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/llama-2/loftq.yml b/examples/llama-2/loftq.yml
index 4529a912dc3a14fa319a5bc2c552219551715818..68d9ac01421e1c841b941b217889b914d95da8de 100644
--- a/examples/llama-2/loftq.yml
+++ b/examples/llama-2/loftq.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/llama-2/lora.yml b/examples/llama-2/lora.yml
index a7793dce4cbe5fcfa314ad1595db7cc84adcc5b5..95bfae6920b4f9734d8bffb433ef6435097cb724 100644
--- a/examples/llama-2/lora.yml
+++ b/examples/llama-2/lora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/llama-2/qlora-fsdp.yml b/examples/llama-2/qlora-fsdp.yml
index 93b3b2a60ad2068b8fcb516eaf1155ebed13da07..88029f92d5bcb2c02ede8b0fe8df2ae451803a1f 100644
--- a/examples/llama-2/qlora-fsdp.yml
+++ b/examples/llama-2/qlora-fsdp.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.05
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/llama-2/qlora.yml b/examples/llama-2/qlora.yml
index 834dbfb33a65dcefc1e8298d74a35bf75a6eafb8..dda32170bd812028cf377e15c7a36323cafe0ca1 100644
--- a/examples/llama-2/qlora.yml
+++ b/examples/llama-2/qlora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/llama-2/relora.yml b/examples/llama-2/relora.yml
index 9fd19953c60190e71cc8326ec52405f26b6b9080..93247ce068abf39db2e1de2fb3d79c82f1ea3f37 100644
--- a/examples/llama-2/relora.yml
+++ b/examples/llama-2/relora.yml
@@ -12,7 +12,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./relora-out
+output_dir: ./outputs/relora-out
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/llama-3/fft-8b.yaml b/examples/llama-3/fft-8b.yaml
index 8c9ba90bfe89bfbd9cc7f0db4173029ba5b2b802..a36fd740e417627859a26136ef4cb3cd94f8552e 100644
--- a/examples/llama-3/fft-8b.yaml
+++ b/examples/llama-3/fft-8b.yaml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.05
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 8192
 sample_packing: true
diff --git a/examples/llama-3/lora-8b.yml b/examples/llama-3/lora-8b.yml
index d60f8a303549913940a91ecbbbfc703abacaae0d..6b0ebaed86cdcc9079f64798d648aa7a37529ac2 100644
--- a/examples/llama-3/lora-8b.yml
+++ b/examples/llama-3/lora-8b.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/llama-3/qlora-fsdp-70b.yaml b/examples/llama-3/qlora-fsdp-70b.yaml
index 8d8785bfd588590ec307a33ac16adc260fad5b44..9b74f6b4de687b9c0e859d1f05f1e169c15421ab 100644
--- a/examples/llama-3/qlora-fsdp-70b.yaml
+++ b/examples/llama-3/qlora-fsdp-70b.yaml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.05
-output_dir: ./out/qlora-llama3-70b
+output_dir: ./outputs/out/qlora-llama3-70b
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/llama-3/qlora.yml b/examples/llama-3/qlora.yml
index 9cedee8eec208d19286a286c875ec4e0bd7b8b9b..44120d9385db8da0d7d67c96ab689726e06ccd95 100644
--- a/examples/llama-3/qlora.yml
+++ b/examples/llama-3/qlora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/mamba/config.yml b/examples/mamba/config.yml
index 0a5223bcac7dd5cbe505522696e2b38aa3e81be1..f88f5138d916055fe3b4e47fd39f575d5c151771 100644
--- a/examples/mamba/config.yml
+++ b/examples/mamba/config.yml
@@ -12,7 +12,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.0
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 2048
 sample_packing: false
diff --git a/examples/mistral/bigstral-ds-zero3.yaml b/examples/mistral/bigstral-ds-zero3.yaml
index cc0a44b2a404c0cd9df67fedacc830a02f50139b..e993e44a7862be45ffd1a173f17e07a7aa2a59b7 100644
--- a/examples/mistral/bigstral-ds-zero3.yaml
+++ b/examples/mistral/bigstral-ds-zero3.yaml
@@ -23,7 +23,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.05
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 2048
 sample_packing: true
diff --git a/examples/mistral/config.yml b/examples/mistral/config.yml
index c909c63e2213b0916a449c554eb4b60943823107..a70937c4fd714b88edb5cee3883aab2f34be43b0 100644
--- a/examples/mistral/config.yml
+++ b/examples/mistral/config.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 8192
 sample_packing: true
diff --git a/examples/mistral/lora-mps.yml b/examples/mistral/lora-mps.yml
index 31b0d527e2bb0379256d3153f0ea19e90b59534c..03c74bb59bbca7c6eb062c68b92ad9b704fff108 100644
--- a/examples/mistral/lora-mps.yml
+++ b/examples/mistral/lora-mps.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 eval_sample_packing: false
 
 adapter: lora
diff --git a/examples/mistral/lora.yml b/examples/mistral/lora.yml
index ac9ac0dd982ff835144d3fa6f1cd5afb094341e3..0d5dc9edd7c9c76adb5aa05d67832e0f9abb6d47 100644
--- a/examples/mistral/lora.yml
+++ b/examples/mistral/lora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.1
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 adapter: lora
 lora_model_dir:
diff --git a/examples/mistral/mistral-qlora-fsdp.yml b/examples/mistral/mistral-qlora-fsdp.yml
index 71ac1e701f907ab774fe573f0c712bde6bee34e1..e6b07c594b1e4f2ec3fecf962a9e0c32407f3eee 100644
--- a/examples/mistral/mistral-qlora-fsdp.yml
+++ b/examples/mistral/mistral-qlora-fsdp.yml
@@ -12,7 +12,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.02
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 model_config:
   output_router_logits: true
diff --git a/examples/mistral/mistral-qlora-orpo.yml b/examples/mistral/mistral-qlora-orpo.yml
index 7727fd7485c83e68c4690a1f4b7209e1079a128b..2549ef018c6e065597e80030a0dc7f05d62ed51c 100644
--- a/examples/mistral/mistral-qlora-orpo.yml
+++ b/examples/mistral/mistral-qlora-orpo.yml
@@ -16,7 +16,7 @@ datasets:
     type: chat_template.argilla
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.1
-output_dir: ./mistral-qlora-orpo-out
+output_dir: ./outputs/mistral-qlora-orpo-out
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/mistral/mixtral-8x22b-qlora-fsdp.yml b/examples/mistral/mixtral-8x22b-qlora-fsdp.yml
index ac80a2a7561f46537e4b9e8a18dc7a42d656a696..fe68b28172901ce8615174da984df916bdab2cee 100644
--- a/examples/mistral/mixtral-8x22b-qlora-fsdp.yml
+++ b/examples/mistral/mixtral-8x22b-qlora-fsdp.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.02
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 model_config:
   output_router_logits: true
diff --git a/examples/mistral/mixtral-qlora-fsdp.yml b/examples/mistral/mixtral-qlora-fsdp.yml
index b6a07ae51cc3df2d90f82187f936a26426fa803e..c0959704027713d5dde9901e376538df17223c2e 100644
--- a/examples/mistral/mixtral-qlora-fsdp.yml
+++ b/examples/mistral/mixtral-qlora-fsdp.yml
@@ -12,7 +12,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.02
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 model_config:
   output_router_logits: true
diff --git a/examples/mistral/mixtral.yml b/examples/mistral/mixtral.yml
index 5ee3da9d65981de3185559cdbbf6daa056c6dba8..13fbe92ab8d7fd4d96444835728c0928d95cfcbe 100644
--- a/examples/mistral/mixtral.yml
+++ b/examples/mistral/mixtral.yml
@@ -12,7 +12,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.0
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 ## You can optionally freeze the entire model and unfreeze a subset of parameters
 unfrozen_parameters:
diff --git a/examples/mistral/mixtral_22.yml b/examples/mistral/mixtral_22.yml
index 9abb6f407a8c9c2186e6a22d72108439e6933aa5..9a1e86386c28963902c1bfb2eb405d4530ae3244 100644
--- a/examples/mistral/mixtral_22.yml
+++ b/examples/mistral/mixtral_22.yml
@@ -21,7 +21,7 @@ model_config:
 datasets:
   - path: yahma/alpaca-cleaned
     type: alpaca
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 8000
 sample_packing: true
diff --git a/examples/mistral/qlora.yml b/examples/mistral/qlora.yml
index 6fbbb96183d1a3382f663d2bf8c9ba7eb5e0d7eb..c7bdb155c0ec69afc0b7147e49d4ca30c141853e 100644
--- a/examples/mistral/qlora.yml
+++ b/examples/mistral/qlora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.1
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/mpt-7b/config.yml b/examples/mpt-7b/config.yml
index 45e31266f1a73057c2f4cc1aabe7b01749a8bc5e..530415de1703ab68ff64451a0c6caf109e4d46f1 100644
--- a/examples/mpt-7b/config.yml
+++ b/examples/mpt-7b/config.yml
@@ -23,7 +23,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./mpt-alpaca-7b
+output_dir: ./outputs/mpt-alpaca-7b
 gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 4
diff --git a/examples/openllama-3b/config.yml b/examples/openllama-3b/config.yml
index 0a404c79d85114359412622dbc642117a5fab7f7..a0473213c033efe5e858fe278177b7caa3310cda 100644
--- a/examples/openllama-3b/config.yml
+++ b/examples/openllama-3b/config.yml
@@ -25,7 +25,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./openllama-out
+output_dir: ./outputs/openllama-out
 gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 4
diff --git a/examples/openllama-3b/lora.yml b/examples/openllama-3b/lora.yml
index b83b2db4e4aa03ffeb4b563fa82092cf72be960b..2b67849159904e990c881285256837e89bb4fa7d 100644
--- a/examples/openllama-3b/lora.yml
+++ b/examples/openllama-3b/lora.yml
@@ -31,7 +31,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 gradient_accumulation_steps: 1
 micro_batch_size: 2
 num_epochs: 4
diff --git a/examples/openllama-3b/qlora.yml b/examples/openllama-3b/qlora.yml
index 3d6218b308a3133b35a1507a04360f8ce094a204..8d4dc05ca7594e6f561cf192f03f092662610b76 100644
--- a/examples/openllama-3b/qlora.yml
+++ b/examples/openllama-3b/qlora.yml
@@ -25,7 +25,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 gradient_accumulation_steps: 1
 micro_batch_size: 2
 num_epochs: 4
diff --git a/examples/phi/phi-ft.yml b/examples/phi/phi-ft.yml
index b21386f7077c10f78e5062a41f4e394a2ff85dac..0dabadc7a4e68788d060ad6ba1e174eef22a7f5f 100644
--- a/examples/phi/phi-ft.yml
+++ b/examples/phi/phi-ft.yml
@@ -12,7 +12,7 @@ datasets:
 
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./phi-sft-out
+output_dir: ./outputs/phi-sft-out
 
 sequence_len: 2048
 sample_packing: true
diff --git a/examples/phi/phi-qlora.yml b/examples/phi/phi-qlora.yml
index d2b5d661c9cf6d6a403883b86a54a6a7017234b2..7c181a3c15968cad2d0df2c42132f3a55b8d1b17 100644
--- a/examples/phi/phi-qlora.yml
+++ b/examples/phi/phi-qlora.yml
@@ -12,7 +12,7 @@ datasets:
 
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./phi-sft-out
+output_dir: ./outputs/phi-sft-out
 
 sequence_len: 2048
 sample_packing: true
diff --git a/examples/phi/phi2-ft.yml b/examples/phi/phi2-ft.yml
index 7a2d05d0189e7f2f0da4d51ec672318d287a37fc..27815550b4c8a4d37a07c47c1ac30102693a2d09 100644
--- a/examples/phi/phi2-ft.yml
+++ b/examples/phi/phi2-ft.yml
@@ -12,7 +12,7 @@ datasets:
 
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./phi-sft-out
+output_dir: ./outputs/phi-sft-out
 
 sequence_len: 2048
 sample_packing: true
diff --git a/examples/pythia-12b/config.yml b/examples/pythia-12b/config.yml
index e44bba74512ae7be47acbe1fef3b47f3ad2e8af4..18e6beaafd2ec0405de2ae68847430063e816396 100644
--- a/examples/pythia-12b/config.yml
+++ b/examples/pythia-12b/config.yml
@@ -26,7 +26,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./pythia-12b
+output_dir: ./outputs/pythia-12b
 gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 5
diff --git a/examples/pythia/lora.yml b/examples/pythia/lora.yml
index 7cb07fe2583dc0bcf16ab6fc4975c7b71e00f524..0aa650f67e807e1fe06f7fcc9d40cc409c073d5f 100644
--- a/examples/pythia/lora.yml
+++ b/examples/pythia/lora.yml
@@ -20,7 +20,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./lora-alpaca-pythia
+output_dir: ./outputs/lora-alpaca-pythia
 gradient_accumulation_steps: 1
 micro_batch_size: 4
 num_epochs: 4
diff --git a/examples/qwen/lora.yml b/examples/qwen/lora.yml
index da4d784e0a0925c8968337ce905d8352c3c2c91f..dd8dc1e4f4e9452874b1ae738247e7a45e2f9085 100644
--- a/examples/qwen/lora.yml
+++ b/examples/qwen/lora.yml
@@ -13,7 +13,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 2048  # supports up to 8192
 sample_packing: false
diff --git a/examples/qwen/qlora.yml b/examples/qwen/qlora.yml
index 501a866b2d872adc94c09b3c864fd31c3e5bb986..01c0c0ab864ac424630a811a87d5d62665e5c205 100644
--- a/examples/qwen/qlora.yml
+++ b/examples/qwen/qlora.yml
@@ -13,7 +13,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 2048  # supports up to 8192
 sample_packing: false
diff --git a/examples/qwen/qwen2-moe-lora.yaml b/examples/qwen/qwen2-moe-lora.yaml
index c59b282d0add721555168290d75bfc4eb4315af4..452335e38f1bc81fc2e1e1de0a0ee19480f8be9d 100644
--- a/examples/qwen/qwen2-moe-lora.yaml
+++ b/examples/qwen/qwen2-moe-lora.yaml
@@ -10,7 +10,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 1024  # supports up to 32k
 sample_packing: false
diff --git a/examples/qwen/qwen2-moe-qlora.yaml b/examples/qwen/qwen2-moe-qlora.yaml
index d6a835a0a30bd4d7d1bd70d12187def02e47ec43..bc11007c78777bf11fc28bfe733752470e4bf046 100644
--- a/examples/qwen/qwen2-moe-qlora.yaml
+++ b/examples/qwen/qwen2-moe-qlora.yaml
@@ -10,7 +10,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 1024  # supports up to 32k
 sample_packing: false
diff --git a/examples/redpajama/config-3b.yml b/examples/redpajama/config-3b.yml
index 5a42e2a9520110882a9952cd7a6bfe68185f79d7..ff395a863df28927fbbf4c86394cd9c8cc30ec3f 100644
--- a/examples/redpajama/config-3b.yml
+++ b/examples/redpajama/config-3b.yml
@@ -24,7 +24,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./redpajama-alpaca-3b
+output_dir: ./outputs/redpajama-alpaca-3b
 batch_size: 4
 micro_batch_size: 1
 num_epochs: 4
diff --git a/examples/replit-3b/config-lora.yml b/examples/replit-3b/config-lora.yml
index bdfe1bd854bfcfcca571259c574e7088500c4cfb..9fee099d474f09acf553ca85e0d9de54461ad996 100644
--- a/examples/replit-3b/config-lora.yml
+++ b/examples/replit-3b/config-lora.yml
@@ -23,7 +23,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./lora-replit
+output_dir: ./outputs/lora-replit
 batch_size: 8
 micro_batch_size: 1
 num_epochs: 4
diff --git a/examples/stablelm-2/1.6b/fft.yml b/examples/stablelm-2/1.6b/fft.yml
index f3fc16f867bcc11d707820778d3bb500d3a2aed2..777262a7ee43de7cb990d276ec032d8b47960620 100644
--- a/examples/stablelm-2/1.6b/fft.yml
+++ b/examples/stablelm-2/1.6b/fft.yml
@@ -12,7 +12,7 @@ datasets:
     type: alpaca
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.05
-output_dir: ./out
+output_dir: ./outputs/out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/stablelm-2/1.6b/lora.yml b/examples/stablelm-2/1.6b/lora.yml
index c5051fab6e6e24134f01ee56b48b742c6e95995a..c65b9e4cd0617afb898bb69533ddc7c7778c4b0b 100644
--- a/examples/stablelm-2/1.6b/lora.yml
+++ b/examples/stablelm-2/1.6b/lora.yml
@@ -12,7 +12,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/starcoder2/qlora.yml b/examples/starcoder2/qlora.yml
index 1efdfbc8e0905988936ef59c502c162ac8b2aae1..83fc0d89f76ba3c842f5011eee1d3d2c13d800ed 100644
--- a/examples/starcoder2/qlora.yml
+++ b/examples/starcoder2/qlora.yml
@@ -11,7 +11,7 @@ datasets:
 
 dataset_prepared_path:
 val_set_size: 0.2
-output_dir: ./qlora
+output_dir: ./outputs/qlora
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/tiny-llama/lora-mps.yml b/examples/tiny-llama/lora-mps.yml
index fd7b02cacac16eaf2cabbac675adb38d38cceca7..c08be82d3b95e328b3dd03c5ad84011dc9547944 100644
--- a/examples/tiny-llama/lora-mps.yml
+++ b/examples/tiny-llama/lora-mps.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/tiny-llama/lora.yml b/examples/tiny-llama/lora.yml
index 4a16f14b92d794d26a41be7e88ca1fe760c9c0a5..c5ff0437e8cc6dd892c3fa06a99f21a0f79abcf8 100644
--- a/examples/tiny-llama/lora.yml
+++ b/examples/tiny-llama/lora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./lora-out
+output_dir: ./outputs/lora-out
 
 sequence_len: 4096
 sample_packing: true
diff --git a/examples/tiny-llama/pretrain.yml b/examples/tiny-llama/pretrain.yml
index 3b68a7f5477bfc96b3e79f86f5b9d4063b18f4d3..e501dcb8e590f7b156bf022b5ad2862ff33d3b5c 100644
--- a/examples/tiny-llama/pretrain.yml
+++ b/examples/tiny-llama/pretrain.yml
@@ -14,7 +14,7 @@ pretraining_dataset:
   type: pretrain
 dataset_prepared_path:
 val_set_size: 0.0
-output_dir: ./model-out
+output_dir: ./outputs/model-out
 
 sequence_len: 2048
 sample_packing: true
diff --git a/examples/tiny-llama/qlora.yml b/examples/tiny-llama/qlora.yml
index 3ea313c838cf69e930dcbe82e268efce049ffda3..0d21aca9d5fe824891b1097d7dccdb8d63551c23 100644
--- a/examples/tiny-llama/qlora.yml
+++ b/examples/tiny-llama/qlora.yml
@@ -11,7 +11,7 @@ datasets:
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0.05
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 adapter: qlora
 lora_model_dir:
diff --git a/examples/xgen-7b/xgen-7b-8k-qlora.yml b/examples/xgen-7b/xgen-7b-8k-qlora.yml
index e3faa01bdb79dd97abd816d2c8f9fd6b8bc3675c..7e3f83cbd7b2318c15d1c5ec904190ff5be09933 100644
--- a/examples/xgen-7b/xgen-7b-8k-qlora.yml
+++ b/examples/xgen-7b/xgen-7b-8k-qlora.yml
@@ -40,7 +40,7 @@ wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 
 # QLoRA paper Table 9
 # - 16 for 7b & 13b
diff --git a/examples/yi-34B-chat/qlora.yml b/examples/yi-34B-chat/qlora.yml
index dc8c37d18796a13d17bda1aa2974224d2a0af15a..7fe322d63dc9840c251f46136963902d556df0e3 100644
--- a/examples/yi-34B-chat/qlora.yml
+++ b/examples/yi-34B-chat/qlora.yml
@@ -33,7 +33,7 @@ eval_sample_packing: false
 eval_batch_size: 1
 
 # LoRA
-output_dir: ./qlora-out
+output_dir: ./outputs/qlora-out
 adapter: qlora
 lora_model_dir:
 lora_r: 32
diff --git a/outputs/.gitignore b/outputs/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..72e8ffc0db8aad71a934dd11e5968bd5109e54b4
--- /dev/null
+++ b/outputs/.gitignore
@@ -0,0 +1 @@
+*