|
{ |
|
"output_dir": "output/firefly-qwen-7b-sft-qlora", |
|
"model_name_or_path": "Qwen/Qwen-7B-Chat", |
|
"train_file": "./data/dummy_data.jsonl", |
|
"template_name": "qwen", |
|
"num_train_epochs": 1, |
|
"per_device_train_batch_size": 1, |
|
"gradient_accumulation_steps": 16, |
|
"learning_rate": 0.0002, |
|
"max_seq_length": 1024, |
|
"logging_steps": 100, |
|
"save_steps": 100, |
|
"save_total_limit": 1, |
|
"lr_scheduler_type": "constant_with_warmup", |
|
"warmup_steps": 100, |
|
"lora_rank": 64, |
|
"lora_alpha": 16, |
|
"lora_dropout": 0.05, |
|
"gradient_checkpointing": true, |
|
"disable_tqdm": false, |
|
"optim": "paged_adamw_32bit", |
|
"seed": 42, |
|
"fp16": true, |
|
"report_to": "tensorboard", |
|
"dataloader_num_workers": 0, |
|
"save_strategy": "steps", |
|
"weight_decay": 0, |
|
"max_grad_norm": 0.3, |
|
"remove_unused_columns": false |
|
} |