tangledgroup
/

tangled-llama-33m-32k-instruct-v0.1

Text Generation

Inference Endpoints

Model card Files Files and versions Community

mtasic85 commited on Oct 1, 2024

Commit

2607374

·

1 Parent(s): 171272c

model

Files changed (1) hide show

scripts/model.yaml +2 -1

scripts/model.yaml CHANGED Viewed

@@ -61,7 +61,8 @@ train:
   global_batch_size: 512
   # Number of samples per data-parallel rank (type: int, default: 4)
-  micro_batch_size: 16
   # micro_batch_size: 14
   # Number of iterations with learning rate warmup active (type: int, default: 2000)

   global_batch_size: 512
   # Number of samples per data-parallel rank (type: int, default: 4)
+  micro_batch_size: 1
+  # micro_batch_size: 16
   # micro_batch_size: 14
   # Number of iterations with learning rate warmup active (type: int, default: 2000)