ich commited on
Commit
590d603
·
unverified ·
1 Parent(s): 409ca0f

Fix bug when using pretokenized datasets (#652)

Browse files

* fix pretokenized datasets readme

* check if dataset type is not set to handle pretokenized datasets

Files changed (2) hide show
  1. README.md +1 -1
  2. src/axolotl/utils/config.py +2 -0
README.md CHANGED
@@ -317,7 +317,7 @@ Using file:
317
  #### How to use your custom pretokenized dataset
318
 
319
  - Do not pass a `type:`
320
- - Dataset must contain `input_ids`, `attention_mask`, `labels` in columns
321
 
322
 
323
  ### Config
 
317
  #### How to use your custom pretokenized dataset
318
 
319
  - Do not pass a `type:`
320
+ - Columns in Dataset must be exactly `input_ids`, `attention_mask`, `labels`
321
 
322
 
323
  ### Config
src/axolotl/utils/config.py CHANGED
@@ -293,6 +293,8 @@ def validate_config(cfg):
293
 
294
  if cfg.datasets:
295
  for idx, ds_cfg in enumerate(cfg.datasets):
 
 
296
  if ds_cfg.type == "sharegpt:chat":
297
  LOG.warning(
298
  PendingDeprecationWarning(
 
293
 
294
  if cfg.datasets:
295
  for idx, ds_cfg in enumerate(cfg.datasets):
296
+ if not ds_cfg.type:
297
+ continue
298
  if ds_cfg.type == "sharegpt:chat":
299
  LOG.warning(
300
  PendingDeprecationWarning(