Merge pull request #104 from OpenAccess-AI-Collective/training-fixes-20230529
Browse files- .github/workflows/base.yml +1 -1
- docker/Dockerfile-base +1 -1
- scripts/finetune.py +9 -9
.github/workflows/base.yml
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
name: ci-cd
|
2 |
|
3 |
on:
|
4 |
push:
|
|
|
1 |
+
name: ci-cd-base
|
2 |
|
3 |
on:
|
4 |
push:
|
docker/Dockerfile-base
CHANGED
@@ -90,7 +90,7 @@ COPY --from=flash-attn-builder /workspace/flash-attention/csrc/rotary/dist/rotar
|
|
90 |
COPY --from=flash-attn-builder /workspace/flash-attention/csrc/layer_norm/dist/dropout_layer_norm-*.whl wheels
|
91 |
|
92 |
RUN pip3 install wheels/deepspeed-*.whl wheels/flash_attn-*.whl wheels/fused_dense_lib-*.whl wheels/xentropy_cuda_lib-*.whl wheels/rotary_emb-*.whl wheels/dropout_layer_norm-*.whl
|
93 |
-
RUN cd /workspace/builds/bitsandbytes && python3 setup.py install
|
94 |
RUN git lfs install --skip-repo
|
95 |
RUN pip3 install "peft @ git+https://github.com/huggingface/peft.git@main" \
|
96 |
"accelerate @ git+https://github.com/huggingface/accelerate.git@main" \
|
|
|
90 |
COPY --from=flash-attn-builder /workspace/flash-attention/csrc/layer_norm/dist/dropout_layer_norm-*.whl wheels
|
91 |
|
92 |
RUN pip3 install wheels/deepspeed-*.whl wheels/flash_attn-*.whl wheels/fused_dense_lib-*.whl wheels/xentropy_cuda_lib-*.whl wheels/rotary_emb-*.whl wheels/dropout_layer_norm-*.whl
|
93 |
+
RUN cd /workspace/builds/bitsandbytes && cp bitsandbytes/libbitsandbytes_cuda.so bitsandbytes/libbitsandbytes_cuda${CUDA_VERSION_BNB}.so && python3 setup.py install
|
94 |
RUN git lfs install --skip-repo
|
95 |
RUN pip3 install "peft @ git+https://github.com/huggingface/peft.git@main" \
|
96 |
"accelerate @ git+https://github.com/huggingface/accelerate.git@main" \
|
scripts/finetune.py
CHANGED
@@ -178,6 +178,15 @@ def train(
|
|
178 |
tokenizer, cfg, DEFAULT_DATASET_PREPARED_PATH
|
179 |
)
|
180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
if prepare_ds_only:
|
182 |
logging.info("Finished preparing dataset. Exiting...")
|
183 |
return
|
@@ -213,15 +222,6 @@ def train(
|
|
213 |
model.save_pretrained(cfg.output_dir)
|
214 |
return
|
215 |
|
216 |
-
if cfg.debug:
|
217 |
-
logging.info("check_dataset_labels...")
|
218 |
-
check_dataset_labels(
|
219 |
-
train_dataset.select(
|
220 |
-
[random.randrange(0, len(train_dataset) - 1) for i in range(5)]
|
221 |
-
),
|
222 |
-
tokenizer,
|
223 |
-
)
|
224 |
-
|
225 |
trainer = setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer)
|
226 |
|
227 |
model.config.use_cache = False
|
|
|
178 |
tokenizer, cfg, DEFAULT_DATASET_PREPARED_PATH
|
179 |
)
|
180 |
|
181 |
+
if cfg.debug or "debug" in kwargs:
|
182 |
+
logging.info("check_dataset_labels...")
|
183 |
+
check_dataset_labels(
|
184 |
+
train_dataset.select(
|
185 |
+
[random.randrange(0, len(train_dataset) - 1) for i in range(5)]
|
186 |
+
),
|
187 |
+
tokenizer,
|
188 |
+
)
|
189 |
+
|
190 |
if prepare_ds_only:
|
191 |
logging.info("Finished preparing dataset. Exiting...")
|
192 |
return
|
|
|
222 |
model.save_pretrained(cfg.output_dir)
|
223 |
return
|
224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
trainer = setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer)
|
226 |
|
227 |
model.config.use_cache = False
|