Upload folder using huggingface_hub
Browse files- README.md +10 -0
- modeling_internvl_chat.py +2 -1
README.md
CHANGED
@@ -112,6 +112,7 @@ model = AutoModel.from_pretrained(
|
|
112 |
path,
|
113 |
torch_dtype=torch.bfloat16,
|
114 |
low_cpu_mem_usage=True,
|
|
|
115 |
trust_remote_code=True).eval().cuda()
|
116 |
```
|
117 |
|
@@ -126,6 +127,7 @@ model = AutoModel.from_pretrained(
|
|
126 |
torch_dtype=torch.bfloat16,
|
127 |
load_in_8bit=True,
|
128 |
low_cpu_mem_usage=True,
|
|
|
129 |
trust_remote_code=True).eval()
|
130 |
```
|
131 |
|
@@ -172,6 +174,7 @@ model = AutoModel.from_pretrained(
|
|
172 |
path,
|
173 |
torch_dtype=torch.bfloat16,
|
174 |
low_cpu_mem_usage=True,
|
|
|
175 |
trust_remote_code=True,
|
176 |
device_map=device_map).eval()
|
177 |
```
|
@@ -189,6 +192,7 @@ model = AutoModel.from_pretrained(
|
|
189 |
path,
|
190 |
torch_dtype=torch.bfloat16,
|
191 |
low_cpu_mem_usage=True,
|
|
|
192 |
trust_remote_code=True).eval().cuda()
|
193 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
194 |
|
@@ -216,6 +220,7 @@ model = AutoModel.from_pretrained(
|
|
216 |
path,
|
217 |
torch_dtype=torch.bfloat16,
|
218 |
low_cpu_mem_usage=True,
|
|
|
219 |
trust_remote_code=True).eval().cuda()
|
220 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
221 |
|
@@ -242,6 +247,7 @@ model = AutoModel.from_pretrained(
|
|
242 |
path,
|
243 |
torch_dtype=torch.bfloat16,
|
244 |
low_cpu_mem_usage=True,
|
|
|
245 |
trust_remote_code=True).eval().cuda()
|
246 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
247 |
|
@@ -275,6 +281,7 @@ model = AutoModel.from_pretrained(
|
|
275 |
path,
|
276 |
torch_dtype=torch.bfloat16,
|
277 |
low_cpu_mem_usage=True,
|
|
|
278 |
trust_remote_code=True).eval().cuda()
|
279 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
280 |
|
@@ -313,6 +320,7 @@ model = AutoModel.from_pretrained(
|
|
313 |
path,
|
314 |
torch_dtype=torch.bfloat16,
|
315 |
low_cpu_mem_usage=True,
|
|
|
316 |
trust_remote_code=True).eval().cuda()
|
317 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
318 |
|
@@ -350,6 +358,7 @@ model = AutoModel.from_pretrained(
|
|
350 |
path,
|
351 |
torch_dtype=torch.bfloat16,
|
352 |
low_cpu_mem_usage=True,
|
|
|
353 |
trust_remote_code=True).eval().cuda()
|
354 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
355 |
|
@@ -420,6 +429,7 @@ model = AutoModel.from_pretrained(
|
|
420 |
path,
|
421 |
torch_dtype=torch.bfloat16,
|
422 |
low_cpu_mem_usage=True,
|
|
|
423 |
trust_remote_code=True).eval().cuda()
|
424 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
425 |
|
|
|
112 |
path,
|
113 |
torch_dtype=torch.bfloat16,
|
114 |
low_cpu_mem_usage=True,
|
115 |
+
use_flash_attn=True,
|
116 |
trust_remote_code=True).eval().cuda()
|
117 |
```
|
118 |
|
|
|
127 |
torch_dtype=torch.bfloat16,
|
128 |
load_in_8bit=True,
|
129 |
low_cpu_mem_usage=True,
|
130 |
+
use_flash_attn=True,
|
131 |
trust_remote_code=True).eval()
|
132 |
```
|
133 |
|
|
|
174 |
path,
|
175 |
torch_dtype=torch.bfloat16,
|
176 |
low_cpu_mem_usage=True,
|
177 |
+
use_flash_attn=True,
|
178 |
trust_remote_code=True,
|
179 |
device_map=device_map).eval()
|
180 |
```
|
|
|
192 |
path,
|
193 |
torch_dtype=torch.bfloat16,
|
194 |
low_cpu_mem_usage=True,
|
195 |
+
use_flash_attn=True,
|
196 |
trust_remote_code=True).eval().cuda()
|
197 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
198 |
|
|
|
220 |
path,
|
221 |
torch_dtype=torch.bfloat16,
|
222 |
low_cpu_mem_usage=True,
|
223 |
+
use_flash_attn=True,
|
224 |
trust_remote_code=True).eval().cuda()
|
225 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
226 |
|
|
|
247 |
path,
|
248 |
torch_dtype=torch.bfloat16,
|
249 |
low_cpu_mem_usage=True,
|
250 |
+
use_flash_attn=True,
|
251 |
trust_remote_code=True).eval().cuda()
|
252 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
253 |
|
|
|
281 |
path,
|
282 |
torch_dtype=torch.bfloat16,
|
283 |
low_cpu_mem_usage=True,
|
284 |
+
use_flash_attn=True,
|
285 |
trust_remote_code=True).eval().cuda()
|
286 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
287 |
|
|
|
320 |
path,
|
321 |
torch_dtype=torch.bfloat16,
|
322 |
low_cpu_mem_usage=True,
|
323 |
+
use_flash_attn=True,
|
324 |
trust_remote_code=True).eval().cuda()
|
325 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
326 |
|
|
|
358 |
path,
|
359 |
torch_dtype=torch.bfloat16,
|
360 |
low_cpu_mem_usage=True,
|
361 |
+
use_flash_attn=True,
|
362 |
trust_remote_code=True).eval().cuda()
|
363 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
364 |
|
|
|
429 |
path,
|
430 |
torch_dtype=torch.bfloat16,
|
431 |
low_cpu_mem_usage=True,
|
432 |
+
use_flash_attn=True,
|
433 |
trust_remote_code=True).eval().cuda()
|
434 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
435 |
|
modeling_internvl_chat.py
CHANGED
@@ -17,7 +17,7 @@ from transformers.utils import ModelOutput, logging
|
|
17 |
|
18 |
from .configuration_internvl_chat import InternVLChatConfig
|
19 |
from .conversation import get_conv_template
|
20 |
-
from .modeling_intern_vit import InternVisionModel
|
21 |
|
22 |
logger = logging.get_logger(__name__)
|
23 |
|
@@ -48,6 +48,7 @@ class InternVLChatModel(PreTrainedModel):
|
|
48 |
self.num_image_token = int((image_size // patch_size) ** 2 * (config.downsample_ratio ** 2))
|
49 |
self.downsample_ratio = config.downsample_ratio
|
50 |
self.ps_version = config.ps_version
|
|
|
51 |
config.vision_config.use_flash_attn = True if use_flash_attn else False
|
52 |
config.llm_config._attn_implementation = 'flash_attention_2' if use_flash_attn else 'eager'
|
53 |
|
|
|
17 |
|
18 |
from .configuration_internvl_chat import InternVLChatConfig
|
19 |
from .conversation import get_conv_template
|
20 |
+
from .modeling_intern_vit import InternVisionModel, has_flash_attn
|
21 |
|
22 |
logger = logging.get_logger(__name__)
|
23 |
|
|
|
48 |
self.num_image_token = int((image_size // patch_size) ** 2 * (config.downsample_ratio ** 2))
|
49 |
self.downsample_ratio = config.downsample_ratio
|
50 |
self.ps_version = config.ps_version
|
51 |
+
use_flash_attn = use_flash_attn if has_flash_attn else False
|
52 |
config.vision_config.use_flash_attn = True if use_flash_attn else False
|
53 |
config.llm_config._attn_implementation = 'flash_attention_2' if use_flash_attn else 'eager'
|
54 |
|