Visual Question Answering
English
HawkEye / config.json
wangyueqian's picture
upload ckpt
8a97bcf verified
raw
history blame
1.38 kB
{
"model": {
"model_cls": "HawkEye_it",
"vit_blip_model_path": "model/VideoChat2/umt_l16_qformer.pth",
"llama_model_path": "model/vicuna-7b",
"videochat2_model_path": "model/VideoChat2/videochat2_7b_stage2.pth",
"freeze_vit": true,
"freeze_qformer": false,
"max_txt_len": 512,
"low_resource": false,
"vision_encoder": {
"name": "vit_l14",
"img_size": 224,
"patch_size": 16,
"d_model": 1024,
"encoder_embed_dim": 1024,
"encoder_depth": 24,
"encoder_num_heads": 16,
"drop_path_rate": 0.0,
"num_frames": 32,
"tubelet_size": 1,
"use_checkpoint": false,
"checkpoint_num": 0,
"pretrained": "",
"return_index": -2,
"vit_add_ln": true,
"ckpt_num_frame": 4
},
"num_query_token": 32,
"qformer_hidden_dropout_prob": 0.1,
"qformer_attention_probs_dropout_prob": 0.1,
"qformer_drop_path_rate": 0.2,
"extra_num_query_token": 64,
"qformer_text_input": true,
"system": "",
"start_token": "<Video>",
"end_token": "</Video>",
"img_start_token": "<Image>",
"img_end_token": "</Image>",
"random_shuffle": true,
"use_lora": true,
"lora_r": 16,
"lora_alpha": 32,
"lora_dropout": 0.1
},
"device": "cuda"
}