wrap prepared_ds_path in str() to avoid TypeError in fsspec package (#1548)
Browse files* wrap prepared_ds_path in str() to avoid TypeError in fsspec package
`fsspec` calls `if "::" in path` on `prepared_ds_path`, which will throw an error if it is a `PosixPath` object.
* update test too
---------
Co-authored-by: Wing Lian <[email protected]>
- src/axolotl/utils/data/sft.py +1 -1
- tests/test_datasets.py +1 -1
src/axolotl/utils/data/sft.py
CHANGED
@@ -421,7 +421,7 @@ def load_tokenized_prepared_datasets(
|
|
421 |
|
422 |
if cfg.local_rank == 0:
|
423 |
LOG.info(f"Saving merged prepared dataset to disk... {prepared_ds_path}")
|
424 |
-
dataset.save_to_disk(prepared_ds_path)
|
425 |
if cfg.push_dataset_to_hub:
|
426 |
LOG.info(
|
427 |
f"Saving merged prepared dataset with push_to_hub... {cfg.push_dataset_to_hub}/{ds_hash}"
|
|
|
421 |
|
422 |
if cfg.local_rank == 0:
|
423 |
LOG.info(f"Saving merged prepared dataset to disk... {prepared_ds_path}")
|
424 |
+
dataset.save_to_disk(str(prepared_ds_path))
|
425 |
if cfg.push_dataset_to_hub:
|
426 |
LOG.info(
|
427 |
f"Saving merged prepared dataset with push_to_hub... {cfg.push_dataset_to_hub}/{ds_hash}"
|
tests/test_datasets.py
CHANGED
@@ -110,7 +110,7 @@ class TestDatasetPreparation(unittest.TestCase):
|
|
110 |
"""Usual use case. Verify datasets saved via `save_to_disk` can be loaded."""
|
111 |
with tempfile.TemporaryDirectory() as tmp_dir:
|
112 |
tmp_ds_name = Path(tmp_dir) / "tmp_dataset"
|
113 |
-
self.dataset.save_to_disk(tmp_ds_name)
|
114 |
|
115 |
prepared_path = Path(tmp_dir) / "prepared"
|
116 |
cfg = DictDefault(
|
|
|
110 |
"""Usual use case. Verify datasets saved via `save_to_disk` can be loaded."""
|
111 |
with tempfile.TemporaryDirectory() as tmp_dir:
|
112 |
tmp_ds_name = Path(tmp_dir) / "tmp_dataset"
|
113 |
+
self.dataset.save_to_disk(str(tmp_ds_name))
|
114 |
|
115 |
prepared_path = Path(tmp_dir) / "prepared"
|
116 |
cfg = DictDefault(
|