adeebDkheel
commited on
support 'cpu'
Browse files- modeling_GOT.py +15 -15
modeling_GOT.py
CHANGED
@@ -558,7 +558,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
|
|
558 |
|
559 |
image_tensor_1 = image_processor_high(image)
|
560 |
|
561 |
-
input_ids = torch.as_tensor(inputs.input_ids).
|
562 |
|
563 |
stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
|
564 |
keywords = [stop_str]
|
@@ -569,7 +569,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
|
|
569 |
with torch.autocast("cuda", dtype=torch.bfloat16):
|
570 |
output_ids = self.generate(
|
571 |
input_ids,
|
572 |
-
images=[image_tensor_1.unsqueeze(0).
|
573 |
do_sample=False,
|
574 |
num_beams = 1,
|
575 |
no_repeat_ngram_size = 20,
|
@@ -581,7 +581,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
|
|
581 |
with torch.autocast("cuda", dtype=torch.bfloat16):
|
582 |
output_ids = self.generate(
|
583 |
input_ids,
|
584 |
-
images=[image_tensor_1.unsqueeze(0).
|
585 |
do_sample=False,
|
586 |
num_beams = 1,
|
587 |
no_repeat_ngram_size = 20,
|
@@ -589,9 +589,9 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
|
|
589 |
max_new_tokens=4096,
|
590 |
stopping_criteria=[stopping_criteria]
|
591 |
)
|
592 |
-
|
593 |
outputs = tokenizer.decode(output_ids[0, input_ids.shape[1]:]).strip()
|
594 |
-
|
595 |
if outputs.endswith(stop_str):
|
596 |
outputs = outputs[:-len(stop_str)]
|
597 |
outputs = outputs.strip()
|
@@ -616,7 +616,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
|
|
616 |
|
617 |
if ocr_type == 'format' and '**kern' not in outputs:
|
618 |
|
619 |
-
|
620 |
if '\\begin{tikzpicture}' not in outputs:
|
621 |
html_path_2 = save_render_file
|
622 |
right_num = outputs.count('\\right')
|
@@ -631,8 +631,8 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
|
|
631 |
outputs_list = outputs.split('\n')
|
632 |
gt= ''
|
633 |
for out in outputs_list:
|
634 |
-
gt += '"' + out.replace('\\', '\\\\') + r'\n' + '"' + '+' + '\n'
|
635 |
-
|
636 |
gt = gt[:-2]
|
637 |
|
638 |
|
@@ -652,7 +652,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
|
|
652 |
out = out[:-1]
|
653 |
if out is None:
|
654 |
break
|
655 |
-
|
656 |
if out:
|
657 |
if out[-1] != ';':
|
658 |
gt += out[:-1] + ';\n'
|
@@ -671,7 +671,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
|
|
671 |
return response_str
|
672 |
|
673 |
def dynamic_preprocess(self, image, min_num=1, max_num=6, image_size=1024, use_thumbnail=True):
|
674 |
-
|
675 |
def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
|
676 |
best_ratio_diff = float('inf')
|
677 |
best_ratio = (1, 1)
|
@@ -687,7 +687,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
|
|
687 |
best_ratio = ratio
|
688 |
# print(f'width: {width}, height: {height}, best_ratio: {best_ratio}')
|
689 |
return best_ratio
|
690 |
-
|
691 |
orig_width, orig_height = image.size
|
692 |
aspect_ratio = orig_width / orig_height
|
693 |
|
@@ -785,7 +785,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
|
|
785 |
|
786 |
|
787 |
if use_im_start_end:
|
788 |
-
qs = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_PATCH_TOKEN*image_token_len*ll + DEFAULT_IM_END_TOKEN + '\n' + qs
|
789 |
else:
|
790 |
qs = DEFAULT_IMAGE_TOKEN + '\n' + qs
|
791 |
|
@@ -812,7 +812,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
|
|
812 |
|
813 |
inputs = tokenizer([prompt])
|
814 |
|
815 |
-
input_ids = torch.as_tensor(inputs.input_ids).
|
816 |
|
817 |
stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
|
818 |
keywords = [stop_str]
|
@@ -823,7 +823,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
|
|
823 |
with torch.autocast("cuda", dtype=torch.bfloat16):
|
824 |
output_ids = self.generate(
|
825 |
input_ids,
|
826 |
-
images=[image_list.
|
827 |
do_sample=False,
|
828 |
num_beams = 1,
|
829 |
# no_repeat_ngram_size = 20,
|
@@ -835,7 +835,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
|
|
835 |
with torch.autocast("cuda", dtype=torch.bfloat16):
|
836 |
output_ids = self.generate(
|
837 |
input_ids,
|
838 |
-
images=[image_list.
|
839 |
do_sample=False,
|
840 |
num_beams = 1,
|
841 |
# no_repeat_ngram_size = 20,
|
|
|
558 |
|
559 |
image_tensor_1 = image_processor_high(image)
|
560 |
|
561 |
+
input_ids = torch.as_tensor(inputs.input_ids).to(self.device)
|
562 |
|
563 |
stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
|
564 |
keywords = [stop_str]
|
|
|
569 |
with torch.autocast("cuda", dtype=torch.bfloat16):
|
570 |
output_ids = self.generate(
|
571 |
input_ids,
|
572 |
+
images=[image_tensor_1.unsqueeze(0).to(self.device)],
|
573 |
do_sample=False,
|
574 |
num_beams = 1,
|
575 |
no_repeat_ngram_size = 20,
|
|
|
581 |
with torch.autocast("cuda", dtype=torch.bfloat16):
|
582 |
output_ids = self.generate(
|
583 |
input_ids,
|
584 |
+
images=[image_tensor_1.unsqueeze(0).to(self.device)],
|
585 |
do_sample=False,
|
586 |
num_beams = 1,
|
587 |
no_repeat_ngram_size = 20,
|
|
|
589 |
max_new_tokens=4096,
|
590 |
stopping_criteria=[stopping_criteria]
|
591 |
)
|
592 |
+
|
593 |
outputs = tokenizer.decode(output_ids[0, input_ids.shape[1]:]).strip()
|
594 |
+
|
595 |
if outputs.endswith(stop_str):
|
596 |
outputs = outputs[:-len(stop_str)]
|
597 |
outputs = outputs.strip()
|
|
|
616 |
|
617 |
if ocr_type == 'format' and '**kern' not in outputs:
|
618 |
|
619 |
+
|
620 |
if '\\begin{tikzpicture}' not in outputs:
|
621 |
html_path_2 = save_render_file
|
622 |
right_num = outputs.count('\\right')
|
|
|
631 |
outputs_list = outputs.split('\n')
|
632 |
gt= ''
|
633 |
for out in outputs_list:
|
634 |
+
gt += '"' + out.replace('\\', '\\\\') + r'\n' + '"' + '+' + '\n'
|
635 |
+
|
636 |
gt = gt[:-2]
|
637 |
|
638 |
|
|
|
652 |
out = out[:-1]
|
653 |
if out is None:
|
654 |
break
|
655 |
+
|
656 |
if out:
|
657 |
if out[-1] != ';':
|
658 |
gt += out[:-1] + ';\n'
|
|
|
671 |
return response_str
|
672 |
|
673 |
def dynamic_preprocess(self, image, min_num=1, max_num=6, image_size=1024, use_thumbnail=True):
|
674 |
+
|
675 |
def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
|
676 |
best_ratio_diff = float('inf')
|
677 |
best_ratio = (1, 1)
|
|
|
687 |
best_ratio = ratio
|
688 |
# print(f'width: {width}, height: {height}, best_ratio: {best_ratio}')
|
689 |
return best_ratio
|
690 |
+
|
691 |
orig_width, orig_height = image.size
|
692 |
aspect_ratio = orig_width / orig_height
|
693 |
|
|
|
785 |
|
786 |
|
787 |
if use_im_start_end:
|
788 |
+
qs = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_PATCH_TOKEN*image_token_len*ll + DEFAULT_IM_END_TOKEN + '\n' + qs
|
789 |
else:
|
790 |
qs = DEFAULT_IMAGE_TOKEN + '\n' + qs
|
791 |
|
|
|
812 |
|
813 |
inputs = tokenizer([prompt])
|
814 |
|
815 |
+
input_ids = torch.as_tensor(inputs.input_ids).to(self.device)
|
816 |
|
817 |
stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
|
818 |
keywords = [stop_str]
|
|
|
823 |
with torch.autocast("cuda", dtype=torch.bfloat16):
|
824 |
output_ids = self.generate(
|
825 |
input_ids,
|
826 |
+
images=[image_list.to(self.device)],
|
827 |
do_sample=False,
|
828 |
num_beams = 1,
|
829 |
# no_repeat_ngram_size = 20,
|
|
|
835 |
with torch.autocast("cuda", dtype=torch.bfloat16):
|
836 |
output_ids = self.generate(
|
837 |
input_ids,
|
838 |
+
images=[image_list.to(self.device)],
|
839 |
do_sample=False,
|
840 |
num_beams = 1,
|
841 |
# no_repeat_ngram_size = 20,
|