|
checkpoint_dir: checkpoints/meta-llama/Meta-Llama-3-8B-Instruct |
|
out_dir: /apdcephfs/share_300000800/user/wenlinyao/research2/AlphaCode/litgpt_trained_model/llama-3-8b-instruct_4k_alphaflow_v10 |
|
precision: bf16-true |
|
devices: 8 |
|
resume: false |
|
data: |
|
class_path: litgpt.data.JSON |
|
init_args: |
|
json_path: /apdcephfs/share_300000800/user/wenlinyao/research2/AlphaCode/litgpt_data/alphaflow_training_data_v10.json |
|
mask_prompt: false |
|
val_split_fraction: 0.005 |
|
prompt_style: llama3 |
|
ignore_index: -100 |
|
seed: 42 |
|
num_workers: 4 |
|
train: |
|
save_interval: 600 |
|
log_interval: 1 |
|
global_batch_size: 128 |
|
micro_batch_size: 1 |
|
lr_warmup_steps: 50 |
|
epochs: 4 |
|
max_seq_length: 4096 |
|
min_lr: 2.0e-06 |
|
eval: |
|
interval: 200 |
|
max_new_tokens: 2048 |
|
max_iters: 100 |
|
initial_validation: false |
|
optimizer: |
|
class_path: torch.optim.AdamW |
|
init_args: |
|
lr: 2.0e-05 |
|
weight_decay: 0.02 |
|
betas: |
|
- 0.9 |
|
- 0.95 |
|
logger_name: csv |
|
seed: 11 |
|
|