II-Tulu-3B-SFT / training_config.yaml
phunguyen01's picture
Training in progress, epoch 1
7d02a54 verified
raw
history blame contribute delete
888 Bytes
wandb_project: llm-training-platform
wandb_name: II-Tulu-3B-SFT
datasets:
- path: allenai/tulu-3-sft-mixture
split: train
type: chat_template
field_messages: messages
message_field_role: role
message_field_content: content
roles:
system:
- system
user:
- user
assistant:
- assistant
chat_template: qwen_25
sequence_len: 2048
base_model: Qwen/Qwen2.5-3B
output_dir: checkpoints/1357e2cd-76bc-46d5-a394-949b712427c7
dataset_prepared_path: checkpoints/1357e2cd-76bc-46d5-a394-949b712427c7/dataset_prepared
flash_attention: true
train_on_inputs: false
pad_to_sequence_len: true
eval_sample_packing: false
push_to_hub: true
bf16: auto
gradient_checkpointing: true
logging_steps: 10
hub_model_id: phunguyen01/II-Tulu-3B-SFT
learning_rate: 5.0e-06
micro_batch_size: 8
num_epochs: 2
seed: 42
gradient_accumulation_steps: 2
sample_packing: true
val_set_size: 0