add back packing efficiency estimate so epochs and multi-gpu works properly (#1697)
Browse files
src/axolotl/core/trainer_builder.py
CHANGED
@@ -387,6 +387,7 @@ class AxolotlTrainer(Trainer):
|
|
387 |
return MultipackBatchSampler(
|
388 |
RandomSampler(self.train_dataset),
|
389 |
lengths=get_dataset_lengths(self.train_dataset),
|
|
|
390 |
batch_max_len=batch_max_len,
|
391 |
batch_size=batch_size,
|
392 |
group_size=self.args.sample_packing_group_size,
|
@@ -412,6 +413,7 @@ class AxolotlTrainer(Trainer):
|
|
412 |
return MultipackBatchSampler(
|
413 |
SequentialSampler(eval_dataset),
|
414 |
lengths=get_dataset_lengths(self.eval_dataset),
|
|
|
415 |
batch_max_len=batch_max_len,
|
416 |
batch_size=batch_size,
|
417 |
group_size=self.args.sample_packing_group_size,
|
|
|
387 |
return MultipackBatchSampler(
|
388 |
RandomSampler(self.train_dataset),
|
389 |
lengths=get_dataset_lengths(self.train_dataset),
|
390 |
+
packing_efficiency_estimate=self.args.sample_packing_efficiency,
|
391 |
batch_max_len=batch_max_len,
|
392 |
batch_size=batch_size,
|
393 |
group_size=self.args.sample_packing_group_size,
|
|
|
413 |
return MultipackBatchSampler(
|
414 |
SequentialSampler(eval_dataset),
|
415 |
lengths=get_dataset_lengths(self.eval_dataset),
|
416 |
+
packing_efficiency_estimate=self.args.sample_packing_efficiency,
|
417 |
batch_max_len=batch_max_len,
|
418 |
batch_size=batch_size,
|
419 |
group_size=self.args.sample_packing_group_size,
|