DreamGenX winglian commited on
Commit
5787e1a
·
unverified ·
1 Parent(s): 8608d80

Fix and document test_datasets (#1228)

Browse files

* Make sure test_dataset are used and treat val_set_size.

* Add test_datasets docs.

* Apply suggestions from code review

---------

Co-authored-by: Wing Lian <[email protected]>

README.md CHANGED
@@ -607,6 +607,17 @@ datasets:
607
  # For `completion` datsets only, uses the provided field instead of `text` column
608
  field:
609
 
 
 
 
 
 
 
 
 
 
 
 
610
  # use RL training: dpo, ipo, kto_pair
611
  rl:
612
 
 
607
  # For `completion` datsets only, uses the provided field instead of `text` column
608
  field:
609
 
610
+ # A list of one or more datasets to eval the model with.
611
+ # You can use either test_datasets, or val_set_size, but not both.
612
+ test_datasets:
613
+ - path: /workspace/data/eval.jsonl
614
+ ds_type: json
615
+ # You need to specify a split. For "json" datasets the default split is called "train".
616
+ split: train
617
+ type: completion
618
+ data_files:
619
+ - /workspace/data/eval.jsonl
620
+
621
  # use RL training: dpo, ipo, kto_pair
622
  rl:
623
 
src/axolotl/core/trainer_builder.py CHANGED
@@ -735,7 +735,7 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
735
  elif self.cfg.sample_packing and self.cfg.eval_sample_packing is False:
736
  training_arguments_kwargs["dataloader_drop_last"] = True
737
 
738
- if self.cfg.val_set_size == 0:
739
  # no eval set, so don't eval
740
  training_arguments_kwargs["evaluation_strategy"] = "no"
741
  elif self.cfg.eval_steps:
@@ -822,6 +822,7 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
822
  self.cfg.load_best_model_at_end is not False
823
  or self.cfg.early_stopping_patience
824
  )
 
825
  and self.cfg.val_set_size > 0
826
  and self.cfg.save_steps
827
  and self.cfg.eval_steps
 
735
  elif self.cfg.sample_packing and self.cfg.eval_sample_packing is False:
736
  training_arguments_kwargs["dataloader_drop_last"] = True
737
 
738
+ if not self.cfg.test_datasets and self.cfg.val_set_size == 0:
739
  # no eval set, so don't eval
740
  training_arguments_kwargs["evaluation_strategy"] = "no"
741
  elif self.cfg.eval_steps:
 
822
  self.cfg.load_best_model_at_end is not False
823
  or self.cfg.early_stopping_patience
824
  )
825
+ and not self.cfg.test_datasets
826
  and self.cfg.val_set_size > 0
827
  and self.cfg.save_steps
828
  and self.cfg.eval_steps
src/axolotl/utils/data.py CHANGED
@@ -440,7 +440,7 @@ def load_prepare_datasets(
440
  split="train",
441
  ) -> Tuple[Dataset, Dataset, List[Prompter]]:
442
  dataset, prompters = load_tokenized_prepared_datasets(
443
- tokenizer, cfg, default_dataset_prepared_path
444
  )
445
 
446
  if cfg.dataset_shard_num and cfg.dataset_shard_idx is not None:
 
440
  split="train",
441
  ) -> Tuple[Dataset, Dataset, List[Prompter]]:
442
  dataset, prompters = load_tokenized_prepared_datasets(
443
+ tokenizer, cfg, default_dataset_prepared_path, split=split
444
  )
445
 
446
  if cfg.dataset_shard_num and cfg.dataset_shard_idx is not None: