firefly-qwen-7b-sft-qlora / train_args.json
lyy14011305's picture
Upload 13 files
d97772d verified
raw
history blame contribute delete
868 Bytes
{
"output_dir": "output/firefly-qwen-7b-sft-qlora",
"model_name_or_path": "Qwen/Qwen-7B-Chat",
"train_file": "./data/dummy_data.jsonl",
"template_name": "qwen",
"num_train_epochs": 1,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 16,
"learning_rate": 0.0002,
"max_seq_length": 1024,
"logging_steps": 100,
"save_steps": 100,
"save_total_limit": 1,
"lr_scheduler_type": "constant_with_warmup",
"warmup_steps": 100,
"lora_rank": 64,
"lora_alpha": 16,
"lora_dropout": 0.05,
"gradient_checkpointing": true,
"disable_tqdm": false,
"optim": "paged_adamw_32bit",
"seed": 42,
"fp16": true,
"report_to": "tensorboard",
"dataloader_num_workers": 0,
"save_strategy": "steps",
"weight_decay": 0,
"max_grad_norm": 0.3,
"remove_unused_columns": false
}