File size: 949 Bytes
55890ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
{
    "model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
    "pooling_mode": "weighted_mean",
    "dataset_name": "E5",
    "dataset_file_path": "cache/echo-data",
    "remove_unused_columns": false,
    "learning_rate": 0.0002,
    "num_train_epochs": 3,
    "warmup_steps": 300,
    "per_device_train_batch_size": 64,
    "per_device_eval_batch_size": 64,
    "gradient_accumulation_steps": 1,
    "do_train": true,
    "disable_tqdm": false,
    "max_seq_length": 512,
    "overwrite_output_dir": true,
    "output_dir": "output/meta-llama/Meta-Llama-3-8B-Instruct",
    "use_adapter": true,
    "percent_prune": [25],
    "autoprune": "small+large",
    "logging_steps": 50,
    "save_steps": 200,
    "save_total_limit": 1,
    "save_only_model": true,
    "stop_after_n_steps": 1000,
    "lora_r": 16,
    "gradient_checkpointing": true,
    "torch_dtype": "bfloat16",
    "attn_implementation": "flash_attention_2",
    "seed": 42
}