multitensor commited on
Commit
a65f868
·
verified ·
1 Parent(s): 1963ed3

Upload finetune_all_multinode_stage2.sh with huggingface_hub

Browse files
Files changed (1) hide show
  1. finetune_all_multinode_stage2.sh +93 -0
finetune_all_multinode_stage2.sh ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Pretrain a multimodal model.
4
+ export OMP_NUM_THREADS=8
5
+ export NCCL_IB_DISABLE=0
6
+ export NCCL_IB_GID_INDEX=3
7
+ export NCCL_SOCKET_IFNAME=eth0
8
+ export NCCL_DEBUG=INFO
9
+ export NCCL_IB_SL=1
10
+ export CUDA_DEVICE_MAX_CONNECTIONS=1
11
+ export NCCL_BLOCKING_WAIT=1
12
+ export NCCL_ASYNC_ERROR_HANDLING=1
13
+ export NCCL_TIMEOUT=500
14
+ export TORCH_DISTRIBUTED_DEBUG=DETAIL
15
+
16
+ DATETIME=`date +'%y-%m-%d-%H-%M-%S'`
17
+
18
+ # Setting for multi nodes training.
19
+ ports=(`echo $METIS_WORKER_0_PORT | tr ',' ' '`)
20
+ port=${ports[0]}
21
+
22
+ echo "total workers: ${ARNOLD_WORKER_NUM}"
23
+ echo "cur worker id: ${ARNOLD_ID}"
24
+ echo "gpus per worker: ${ARNOLD_WORKER_GPU}"
25
+ echo "master ip: ${METIS_WORKER_0_HOST}"
26
+ echo "master port: ${port}"
27
+
28
+ source /mnt/bn/tns-algo-video-public-my2/wangpeng.an/environment/anaconda3/bin/activate multimodal
29
+
30
+ cd /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/OmniFusion-main
31
+
32
+ # Install necessary packages
33
+ pip3 install requests
34
+ pip3 install attrs
35
+ pip3 install aiofiles
36
+ pip3 install pynvml
37
+
38
+
39
+ # Print Python executable path, torchrun, deepspeed and PYTHONPATH
40
+ echo "Python executable: $(which python)"
41
+ echo "torchrun executable: $(which torchrun)"
42
+ echo "deepspeed executable: $(which deepspeed)"
43
+ echo "PYTHONPATH before torchrun: $PYTHONPATH"
44
+
45
+ sudo chmod 777 /var/lib/fastrak -R
46
+
47
+ # Launch training with DeepSpeed and torchrun
48
+ ACCELERATE_CPU_AFFINITY=1 torchrun --nproc_per_node=$ARNOLD_WORKER_GPU --nnodes=$ARNOLD_WORKER_NUM --node_rank=$ARNOLD_ID --master_addr=$METIS_WORKER_0_HOST --master_port=$port \
49
+ llava/train/train_mem.py \
50
+ --deepspeed ./scripts/zero2.json \
51
+ --model_name_or_path ./checkpoints/OmniFusion-8B \
52
+ --version llama_3_1 \
53
+ --data_path /mnt/bn/tns-algo-video-public-my2/wangpeng.an/data/stage2/stage2_new_1016.json \
54
+ --audio_asr_folder /mnt/bn/tns-algo-video-public-my2/wangpeng.an/data \
55
+ --audio_caption_folder /mnt/bn/tns-algo-video-public-my2/wangpeng.an/data/stage2/audio_caption_data_tune/audio_caption_tune/audio_caption \
56
+ --video_folder /mnt/bn/tns-algo-video-public-my2/wangpeng.an/data/stage2/videos_images_tune/video_images_tune/videos_images_tune \
57
+ --image_folder /mnt/bn/tns-algo-video-public-my2/wangpeng.an/data/stage2/videos_images_tune/video_images_tune/videos_images_tune \
58
+ --X "Audio_asr" "Audio_caption" "Video" "Image" \
59
+ --audio_tower /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/LanguageBind_Audio_Asr \
60
+ --audio_caption_tower /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/pretrained_model/LanguageBind_Audio \
61
+ --video_tower /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/pretrained_model/LanguageBind_Video_merge \
62
+ --image_tower /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/pretrained_model/LanguageBind_Image \
63
+ --pretrain_mm_mlp_adapter checkpoints/Video-LLaVA-Pretrain-7B/checkpoint-4000/mm_projector.bin \
64
+ --mm_projector_type mlp2x_gelu \
65
+ --mm_vision_select_layer -2 \
66
+ --mm_use_x_start_end False \
67
+ --mm_use_x_patch_token False \
68
+ --image_aspect_ratio pad \
69
+ --group_by_modality_length True \
70
+ --tune_mm_mlp_adapter False \
71
+ --bf16 True \
72
+ --output_dir ./checkpoints/OmniFusion-8B-stage3-1017 \
73
+ --num_train_epochs 1 \
74
+ --per_device_train_batch_size 8 \
75
+ --per_device_eval_batch_size 4 \
76
+ --gradient_accumulation_steps 1 \
77
+ --evaluation_strategy "no" \
78
+ --save_strategy "steps" \
79
+ --save_steps 3000 \
80
+ --save_total_limit 4 \
81
+ --learning_rate 2e-5 \
82
+ --weight_decay 0. \
83
+ --warmup_ratio 0.03 \
84
+ --lr_scheduler_type "cosine" \
85
+ --logging_steps 1 \
86
+ --tf32 True \
87
+ --model_max_length 2048 \
88
+ --tokenizer_model_max_length 3072 \
89
+ --gradient_checkpointing True \
90
+ --dataloader_num_workers 8 \
91
+ --lazy_preprocess True \
92
+ --report_to none \
93
+ --cache_dir "./cache_dir"