multitensor commited on
Commit
67ea55d
·
verified ·
1 Parent(s): faa8402

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -8,6 +8,8 @@
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
 
 
11
  *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
@@ -33,3 +35,26 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.lz4 filter=lfs diff=lfs merge=lfs -text
12
+ *.mds filter=lfs diff=lfs merge=lfs -text
13
  *.mlmodel filter=lfs diff=lfs merge=lfs -text
14
  *.model filter=lfs diff=lfs merge=lfs -text
15
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
35
  *.zip filter=lfs diff=lfs merge=lfs -text
36
  *.zst filter=lfs diff=lfs merge=lfs -text
37
  *tfevents* filter=lfs diff=lfs merge=lfs -text
38
+ # Audio files - uncompressed
39
+ *.pcm filter=lfs diff=lfs merge=lfs -text
40
+ *.sam filter=lfs diff=lfs merge=lfs -text
41
+ *.raw filter=lfs diff=lfs merge=lfs -text
42
+ # Audio files - compressed
43
+ *.aac filter=lfs diff=lfs merge=lfs -text
44
+ *.flac filter=lfs diff=lfs merge=lfs -text
45
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
46
+ *.ogg filter=lfs diff=lfs merge=lfs -text
47
+ *.wav filter=lfs diff=lfs merge=lfs -text
48
+ # Image files - uncompressed
49
+ *.bmp filter=lfs diff=lfs merge=lfs -text
50
+ *.gif filter=lfs diff=lfs merge=lfs -text
51
+ *.png filter=lfs diff=lfs merge=lfs -text
52
+ *.tiff filter=lfs diff=lfs merge=lfs -text
53
+ # Image files - compressed
54
+ *.jpg filter=lfs diff=lfs merge=lfs -text
55
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
56
+ *.webp filter=lfs diff=lfs merge=lfs -text
57
+ # Video files - compressed
58
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
59
+ *.webm filter=lfs diff=lfs merge=lfs -text
60
+ *.json filter=lfs diff=lfs merge=lfs -text
finetune_all_stage4.sh ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Pretrain a multimodal model.
4
+ export OMP_NUM_THREADS=8
5
+ export NCCL_IB_DISABLE=0
6
+ export NCCL_IB_GID_INDEX=3
7
+ export NCCL_SOCKET_IFNAME=eth0
8
+ export NCCL_DEBUG=INFO
9
+ export NCCL_IB_SL=1
10
+ export CUDA_DEVICE_MAX_CONNECTIONS=1
11
+ export NCCL_BLOCKING_WAIT=1
12
+ export NCCL_ASYNC_ERROR_HANDLING=1
13
+ export NCCL_TIMEOUT=500
14
+ export TORCH_DISTRIBUTED_DEBUG=DETAIL
15
+ export NCCL_SOCKET_FAMILY=AF_INET6
16
+ export NCCL_NET_PLUGIN=libnccl-net-gcp-fastrak.so
17
+
18
+ DATETIME=`date +'%y-%m-%d-%H-%M-%S'`
19
+
20
+ # Setting for multi nodes training.
21
+ ports=(`echo $METIS_WORKER_0_PORT | tr ',' ' '`)
22
+ port=${ports[0]}
23
+
24
+ echo "total workers: ${ARNOLD_WORKER_NUM}"
25
+ echo "cur worker id: ${ARNOLD_ID}"
26
+ echo "gpus per worker: ${ARNOLD_WORKER_GPU}"
27
+ echo "master ip: ${METIS_WORKER_0_HOST}"
28
+ echo "master port: ${port}"
29
+
30
+ source /mnt/bn/tns-algo-video-public-my2/wangpeng.an/environment/anaconda3/bin/activate vicuna16k
31
+
32
+ cd /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/OmniFusion-vicuna16k
33
+
34
+ # Install necessary packages
35
+ pip3 install requests
36
+ pip3 install attrs
37
+ pip3 install aiofiles
38
+ pip3 install pynvml
39
+
40
+
41
+ # Print Python executable path, torchrun, deepspeed and PYTHONPATH
42
+ echo "Python executable: $(which python)"
43
+ echo "torchrun executable: $(which torchrun)"
44
+ echo "deepspeed executable: $(which deepspeed)"
45
+ echo "PYTHONPATH before torchrun: $PYTHONPATH"
46
+
47
+ sudo chmod 777 /var/lib/fastrak -R
48
+
49
+ # Launch training with DeepSpeed and torchrun
50
+ ACCELERATE_CPU_AFFINITY=1 torchrun --nproc_per_node=$ARNOLD_WORKER_GPU --nnodes=$ARNOLD_WORKER_NUM --node_rank=$ARNOLD_ID --master_addr=$METIS_WORKER_0_HOST --master_port=$port \
51
+ llava/train/train_mem.py \
52
+ --deepspeed ./scripts/zero2.json \
53
+ --model_name_or_path /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/hf/vicuna_omnis2_light_next \
54
+ --version v1 \
55
+ --data_path /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/hf/stage4_next_json/video_stag4_0116_next.json \
56
+ --audio_asr_folder /mnt/bn/tns-algo-video-public-my2/wangpeng.an/data \
57
+ --audio_caption_folder /mnt/bn/tns-algo-video-public-my2/wangpeng.an/data/audio_caption \
58
+ --video_folder /mnt/bn/tns-algo-video-public-my2/wangpeng.an/data/video \
59
+ --image_folder /mnt/bn/tns-algo-video-public-my2/wangpeng.an/data/video/Video-LLaVA \
60
+ --X "Audio_asr" "Audio_caption" "Video" "Image" \
61
+ --audio_tower /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/LanguageBind_Audio_Asr \
62
+ --audio_caption_tower /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/pretrained_model/LanguageBind_Audio \
63
+ --video_tower /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/pretrained_model/LanguageBind_Video_merge \
64
+ --image_tower /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/pretrained_model/LanguageBind_Image \
65
+ --pretrain_mm_mlp_adapter /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/OmniFusion-vicuna16k/checkpoints/Video-LLaVA-Pretrain-7B-0106/mm_projector.bin \
66
+ --mm_projector_type mlp2x_gelu \
67
+ --mm_vision_select_layer -2 \
68
+ --mm_use_x_start_end False \
69
+ --mm_use_x_patch_token False \
70
+ --image_aspect_ratio pad \
71
+ --group_by_modality_length True \
72
+ --bf16 True \
73
+ --output_dir ./checkpoints/OmniFusion-vicuna-stage4 \
74
+ --num_train_epochs 1 \
75
+ --per_device_train_batch_size 4 \
76
+ --per_device_eval_batch_size 4 \
77
+ --gradient_accumulation_steps 1 \
78
+ --evaluation_strategy "no" \
79
+ --save_strategy "steps" \
80
+ --save_steps 3000 \
81
+ --save_total_limit 4 \
82
+ --learning_rate 2e-5 \
83
+ --weight_decay 0. \
84
+ --warmup_ratio 0.03 \
85
+ --lr_scheduler_type "cosine" \
86
+ --logging_steps 1 \
87
+ --tf32 True \
88
+ --model_max_length 2048 \
89
+ --tokenizer_model_max_length 3072 \
90
+ --gradient_checkpointing True \
91
+ --dataloader_num_workers 8 \
92
+ --lazy_preprocess True \
93
+ --report_to none \
94
+ --cache_dir "./cache_dir"
video_stag4_0116_next.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1866c5796ced730a3855bace0d3520213639cd7ee561b44b1fdaa1e204ef966c
3
+ size 423109312