|
data: |
|
prompt: llama3_formal |
|
train: ../data/susgen/FINAL/PER_3500/FINAL_PER3500_30k.json |
|
val: null |
|
val_split_ratio: 0.005 |
|
device: cuda |
|
instruct_mask: true |
|
local_rank: 0 |
|
model: |
|
acceleration: null |
|
int4_config: |
|
bnb_4bit_compute_dtype: bfloat16 |
|
bnb_4bit_quant_type: nf4 |
|
bnb_4bit_use_double_quant: true |
|
load_in_4bit: true |
|
load_in_8bit: false |
|
int8_config: |
|
load_in_4bit: false |
|
load_in_8bit: true |
|
lora: |
|
bias: none |
|
inference_mode: false |
|
lora_alpha: 32 |
|
lora_dropout: 0.1 |
|
r: 16 |
|
target_modules: |
|
- q_proj |
|
- k_proj |
|
- v_proj |
|
- o_proj |
|
- gate_proj |
|
- up_proj |
|
- down_proj |
|
- lm_head |
|
task_type: CAUSAL_LM |
|
lora_path: false |
|
model_path: ../ckpts/Meta-Llama-3-8B-Instruct |
|
quantization: int4 |
|
seed: 2024 |
|
show_config: false |
|
use_lora: true |
|
window: null |
|
name: 30k-Llama3-8B |
|
output_dir: ../results/ |
|
tokenizer: |
|
add_bos_token: true |
|
add_eos_token: false |
|
add_prefix_space: false |
|
encode: |
|
max_length: 2048 |
|
return_tensors: pt |
|
truncation: true |
|
model_max_length: 2048 |
|
padding_side: left |
|
pretrained_model_name_or_path: ../ckpts/Meta-Llama-3-8B |
|
truncation_side: right |
|
use_fast: true |
|
trainer: NewTrainer |
|
training: |
|
bf16: true |
|
deepspeed: ./configs/ds_configs/ds_config_stage_2.json |
|
gradient_accumulation_steps: 16 |
|
learning_rate: 1.0e-05 |
|
logging_steps: 1 |
|
lr_scheduler_type: cosine |
|
max_steps: 301 |
|
optim: paged_adamw_32bit |
|
per_device_train_batch_size: 16 |
|
remove_unused_columns: false |
|
report_to: wandb |
|
resume_from_checkpoint: null |
|
save_steps: 20 |
|
save_strategy: steps |
|
warmup_steps: 100 |
|
weight_decay: 0.01 |
|
|