wandb_project: llm-training-platform | |
wandb_name: II-Tulu-3B-SFT | |
datasets: | |
- path: allenai/tulu-3-sft-mixture | |
split: train | |
type: chat_template | |
field_messages: messages | |
message_field_role: role | |
message_field_content: content | |
roles: | |
system: | |
- system | |
user: | |
- user | |
assistant: | |
- assistant | |
chat_template: qwen_25 | |
sequence_len: 2048 | |
base_model: Qwen/Qwen2.5-3B | |
output_dir: checkpoints/1357e2cd-76bc-46d5-a394-949b712427c7 | |
dataset_prepared_path: checkpoints/1357e2cd-76bc-46d5-a394-949b712427c7/dataset_prepared | |
flash_attention: true | |
train_on_inputs: false | |
pad_to_sequence_len: true | |
eval_sample_packing: false | |
push_to_hub: true | |
bf16: auto | |
gradient_checkpointing: true | |
logging_steps: 10 | |
hub_model_id: phunguyen01/II-Tulu-3B-SFT | |
learning_rate: 5.0e-06 | |
micro_batch_size: 8 | |
num_epochs: 2 | |
seed: 42 | |
gradient_accumulation_steps: 2 | |
sample_packing: true | |
val_set_size: 0 | |