LukasHug's picture
Upload folder using huggingface_hub
a8ed15e verified
raw
history blame
1.4 kB
train_old.py
deepspeed /workspace/llavaguard/configs/zero3.json
model_name_or_path lmms-lab/llava-onevision-qwen2-0.5b-ov
version qwen_1_5
data_path /common-repos/LlavaGuard/data/LlavaGuard-DS/v24/train_oversampled.json
data_path_eval /common-repos/LlavaGuard/data/LlavaGuard-DS/v24/eval.json
image_folder /common-repos
mm_tunable_parts=mm_language_model
mm_vision_tower_lr=2e-6
vision_tower google/siglip-so400m-patch14-384
mm_projector_type mlp2x_gelu
mm_vision_select_layer -2
mm_use_im_start_end False
mm_use_im_patch_token False
group_by_modality_length True
image_aspect_ratio anyres_max_9
image_grid_pinpoints (1x1),...,(6x6)
mm_patch_merge_type spatial_unpad
bf16 True
run_name LlavaGuard-v1.2-mini-ov-lmms-lab_llava-onevision-qwen2-0.5b-ov-LlavaGuard-DS-v24
output_dir /common-repos/LlavaGuard/models/LlavaGuard-v1.2-mini-ov/v24
num_train_epochs 3
per_device_train_batch_size 1
per_device_eval_batch_size 2
gradient_accumulation_steps 25
evaluation_strategy no
eval_steps 1
save_strategy epoch
save_steps 1
save_total_limit 1
learning_rate 1e-5
weight_decay 0.
warmup_ratio 0.03
lr_scheduler_type cosine
logging_steps 1
tf32 True
model_max_length 32768
gradient_checkpointing True
dataloader_num_workers 4
lazy_preprocess True
report_to wandb
torch_compile True
torch_compile_backend inductor
dataloader_drop_last True
Eval date: 22/11/2024 07:49:11