|
train_old.py |
|
deepspeed /workspace/llavaguard/configs/zero3.json |
|
model_name_or_path lmms-lab/llava-onevision-qwen2-0.5b-ov |
|
version qwen_1_5 |
|
data_path /common-repos/LlavaGuard/data/LlavaGuard-DS/v24/train_oversampled.json |
|
data_path_eval /common-repos/LlavaGuard/data/LlavaGuard-DS/v24/eval.json |
|
image_folder /common-repos |
|
mm_tunable_parts=mm_language_model |
|
mm_vision_tower_lr=2e-6 |
|
vision_tower google/siglip-so400m-patch14-384 |
|
mm_projector_type mlp2x_gelu |
|
mm_vision_select_layer -2 |
|
mm_use_im_start_end False |
|
mm_use_im_patch_token False |
|
group_by_modality_length True |
|
image_aspect_ratio anyres_max_9 |
|
image_grid_pinpoints (1x1),...,(6x6) |
|
mm_patch_merge_type spatial_unpad |
|
bf16 True |
|
run_name LlavaGuard-v1.2-mini-ov-lmms-lab_llava-onevision-qwen2-0.5b-ov-LlavaGuard-DS-v24 |
|
output_dir /common-repos/LlavaGuard/models/LlavaGuard-v1.2-mini-ov/v24 |
|
num_train_epochs 3 |
|
per_device_train_batch_size 1 |
|
per_device_eval_batch_size 2 |
|
gradient_accumulation_steps 25 |
|
evaluation_strategy no |
|
eval_steps 1 |
|
save_strategy epoch |
|
save_steps 1 |
|
save_total_limit 1 |
|
learning_rate 1e-5 |
|
weight_decay 0. |
|
warmup_ratio 0.03 |
|
lr_scheduler_type cosine |
|
logging_steps 1 |
|
tf32 True |
|
model_max_length 32768 |
|
gradient_checkpointing True |
|
dataloader_num_workers 4 |
|
lazy_preprocess True |
|
report_to wandb |
|
torch_compile True |
|
torch_compile_backend inductor |
|
dataloader_drop_last True |
|
Eval date: 22/11/2024 07:49:11 |