#SBATCH --job-name=finetune_taiyi # create a short name for your job | |
#SBATCH --nodes=1 # node count | |
#SBATCH --ntasks-per-node=8 # number of tasks to run per node | |
#SBATCH --cpus-per-task=30 # cpu-cores per task (>1 if multi-threaded tasks) | |
#SBATCH --gres=gpu:8 # number of gpus per node | |
#SBATCH -o %x-%j.log # output and error log file names (%x for job id) | |
#SBATCH -x dgx050 | |
# pwd=Fengshenbang-LM/fengshen/examples/pretrain_erlangshen | |
NNODES=1 | |
GPUS_PER_NODE=1 | |
MICRO_BATCH_SIZE=64 | |
DATA_ARGS="\ | |
--test_batchsize $MICRO_BATCH_SIZE \ | |
--datasets_name flickr30k-CNA \ | |
" | |
MODEL_ARGS="\ | |
--model_path /cognitive_comp/gaoxinyu/github/Fengshenbang-LM/fengshen/workspace/taiyi-clip-huge-v2/hf_out_0_661 \ | |
" | |
TRAINER_ARGS="\ | |
--gpus $GPUS_PER_NODE \ | |
--num_nodes $NNODES \ | |
--strategy ddp \ | |
--log_every_n_steps 0 \ | |
--default_root_dir . \ | |
--precision 32 \ | |
" | |
# num_sanity_val_steps, limit_val_batches 通过这俩参数把validation关了 | |
export options=" \ | |
$DATA_ARGS \ | |
$MODEL_ARGS \ | |
$TRAINER_ARGS \ | |
" | |
CUDA_VISIBLE_DEVICES=0 python3 test.py $options | |
#srun -N $NNODES --gres=gpu:$GPUS_PER_NODE --ntasks-per-node=$GPUS_PER_NODE --cpus-per-task=20 python3 pretrain.py $options | |