File size: 1,401 Bytes
a8ed15e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
train_old.py 
deepspeed /workspace/llavaguard/configs/zero3.json 
model_name_or_path lmms-lab/llava-onevision-qwen2-0.5b-ov 
version qwen_1_5 
data_path /common-repos/LlavaGuard/data/LlavaGuard-DS/v24/train_oversampled.json 
data_path_eval /common-repos/LlavaGuard/data/LlavaGuard-DS/v24/eval.json 
image_folder /common-repos 
mm_tunable_parts=mm_language_model 
mm_vision_tower_lr=2e-6 
vision_tower google/siglip-so400m-patch14-384 
mm_projector_type mlp2x_gelu 
mm_vision_select_layer -2 
mm_use_im_start_end False 
mm_use_im_patch_token False 
group_by_modality_length True 
image_aspect_ratio anyres_max_9 
image_grid_pinpoints (1x1),...,(6x6) 
mm_patch_merge_type spatial_unpad 
bf16 True 
run_name LlavaGuard-v1.2-mini-ov-lmms-lab_llava-onevision-qwen2-0.5b-ov-LlavaGuard-DS-v24 
output_dir /common-repos/LlavaGuard/models/LlavaGuard-v1.2-mini-ov/v24 
num_train_epochs 3 
per_device_train_batch_size 1 
per_device_eval_batch_size 2 
gradient_accumulation_steps 25 
evaluation_strategy no 
eval_steps 1 
save_strategy epoch 
save_steps 1 
save_total_limit 1 
learning_rate 1e-5 
weight_decay 0. 
warmup_ratio 0.03 
lr_scheduler_type cosine 
logging_steps 1 
tf32 True 
model_max_length 32768 
gradient_checkpointing True 
dataloader_num_workers 4 
lazy_preprocess True 
report_to wandb 
torch_compile True 
torch_compile_backend inductor 
dataloader_drop_last True
Eval date: 22/11/2024 07:49:11