train_old.py deepspeed /workspace/llavaguard/configs/zero3.json model_name_or_path lmms-lab/llava-onevision-qwen2-0.5b-ov version qwen_1_5 data_path /common-repos/LlavaGuard/data/LlavaGuard-DS/v24/train_oversampled.json data_path_eval /common-repos/LlavaGuard/data/LlavaGuard-DS/v24/eval.json image_folder /common-repos mm_tunable_parts=mm_language_model mm_vision_tower_lr=2e-6 vision_tower google/siglip-so400m-patch14-384 mm_projector_type mlp2x_gelu mm_vision_select_layer -2 mm_use_im_start_end False mm_use_im_patch_token False group_by_modality_length True image_aspect_ratio anyres_max_9 image_grid_pinpoints (1x1),...,(6x6) mm_patch_merge_type spatial_unpad bf16 True run_name LlavaGuard-v1.2-mini-ov-lmms-lab_llava-onevision-qwen2-0.5b-ov-LlavaGuard-DS-v24 output_dir /common-repos/LlavaGuard/models/LlavaGuard-v1.2-mini-ov/v24 num_train_epochs 3 per_device_train_batch_size 1 per_device_eval_batch_size 2 gradient_accumulation_steps 25 evaluation_strategy no eval_steps 1 save_strategy epoch save_steps 1 save_total_limit 1 learning_rate 1e-5 weight_decay 0. warmup_ratio 0.03 lr_scheduler_type cosine logging_steps 1 tf32 True model_max_length 32768 gradient_checkpointing True dataloader_num_workers 4 lazy_preprocess True report_to wandb torch_compile True torch_compile_backend inductor dataloader_drop_last True Eval date: 22/11/2024 07:49:11