adeebDkheel commited on
Commit
0ffc6a0
·
verified ·
1 Parent(s): cf6b738

support 'cpu'

Browse files
Files changed (1) hide show
  1. modeling_GOT.py +15 -15
modeling_GOT.py CHANGED
@@ -558,7 +558,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
558
 
559
  image_tensor_1 = image_processor_high(image)
560
 
561
- input_ids = torch.as_tensor(inputs.input_ids).cuda()
562
 
563
  stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
564
  keywords = [stop_str]
@@ -569,7 +569,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
569
  with torch.autocast("cuda", dtype=torch.bfloat16):
570
  output_ids = self.generate(
571
  input_ids,
572
- images=[image_tensor_1.unsqueeze(0).half().cuda()],
573
  do_sample=False,
574
  num_beams = 1,
575
  no_repeat_ngram_size = 20,
@@ -581,7 +581,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
581
  with torch.autocast("cuda", dtype=torch.bfloat16):
582
  output_ids = self.generate(
583
  input_ids,
584
- images=[image_tensor_1.unsqueeze(0).half().cuda()],
585
  do_sample=False,
586
  num_beams = 1,
587
  no_repeat_ngram_size = 20,
@@ -589,9 +589,9 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
589
  max_new_tokens=4096,
590
  stopping_criteria=[stopping_criteria]
591
  )
592
-
593
  outputs = tokenizer.decode(output_ids[0, input_ids.shape[1]:]).strip()
594
-
595
  if outputs.endswith(stop_str):
596
  outputs = outputs[:-len(stop_str)]
597
  outputs = outputs.strip()
@@ -616,7 +616,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
616
 
617
  if ocr_type == 'format' and '**kern' not in outputs:
618
 
619
-
620
  if '\\begin{tikzpicture}' not in outputs:
621
  html_path_2 = save_render_file
622
  right_num = outputs.count('\\right')
@@ -631,8 +631,8 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
631
  outputs_list = outputs.split('\n')
632
  gt= ''
633
  for out in outputs_list:
634
- gt += '"' + out.replace('\\', '\\\\') + r'\n' + '"' + '+' + '\n'
635
-
636
  gt = gt[:-2]
637
 
638
 
@@ -652,7 +652,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
652
  out = out[:-1]
653
  if out is None:
654
  break
655
-
656
  if out:
657
  if out[-1] != ';':
658
  gt += out[:-1] + ';\n'
@@ -671,7 +671,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
671
  return response_str
672
 
673
  def dynamic_preprocess(self, image, min_num=1, max_num=6, image_size=1024, use_thumbnail=True):
674
-
675
  def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
676
  best_ratio_diff = float('inf')
677
  best_ratio = (1, 1)
@@ -687,7 +687,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
687
  best_ratio = ratio
688
  # print(f'width: {width}, height: {height}, best_ratio: {best_ratio}')
689
  return best_ratio
690
-
691
  orig_width, orig_height = image.size
692
  aspect_ratio = orig_width / orig_height
693
 
@@ -785,7 +785,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
785
 
786
 
787
  if use_im_start_end:
788
- qs = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_PATCH_TOKEN*image_token_len*ll + DEFAULT_IM_END_TOKEN + '\n' + qs
789
  else:
790
  qs = DEFAULT_IMAGE_TOKEN + '\n' + qs
791
 
@@ -812,7 +812,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
812
 
813
  inputs = tokenizer([prompt])
814
 
815
- input_ids = torch.as_tensor(inputs.input_ids).cuda()
816
 
817
  stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
818
  keywords = [stop_str]
@@ -823,7 +823,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
823
  with torch.autocast("cuda", dtype=torch.bfloat16):
824
  output_ids = self.generate(
825
  input_ids,
826
- images=[image_list.half().cuda()],
827
  do_sample=False,
828
  num_beams = 1,
829
  # no_repeat_ngram_size = 20,
@@ -835,7 +835,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
835
  with torch.autocast("cuda", dtype=torch.bfloat16):
836
  output_ids = self.generate(
837
  input_ids,
838
- images=[image_list.half().cuda()],
839
  do_sample=False,
840
  num_beams = 1,
841
  # no_repeat_ngram_size = 20,
 
558
 
559
  image_tensor_1 = image_processor_high(image)
560
 
561
+ input_ids = torch.as_tensor(inputs.input_ids).to(self.device)
562
 
563
  stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
564
  keywords = [stop_str]
 
569
  with torch.autocast("cuda", dtype=torch.bfloat16):
570
  output_ids = self.generate(
571
  input_ids,
572
+ images=[image_tensor_1.unsqueeze(0).to(self.device)],
573
  do_sample=False,
574
  num_beams = 1,
575
  no_repeat_ngram_size = 20,
 
581
  with torch.autocast("cuda", dtype=torch.bfloat16):
582
  output_ids = self.generate(
583
  input_ids,
584
+ images=[image_tensor_1.unsqueeze(0).to(self.device)],
585
  do_sample=False,
586
  num_beams = 1,
587
  no_repeat_ngram_size = 20,
 
589
  max_new_tokens=4096,
590
  stopping_criteria=[stopping_criteria]
591
  )
592
+
593
  outputs = tokenizer.decode(output_ids[0, input_ids.shape[1]:]).strip()
594
+
595
  if outputs.endswith(stop_str):
596
  outputs = outputs[:-len(stop_str)]
597
  outputs = outputs.strip()
 
616
 
617
  if ocr_type == 'format' and '**kern' not in outputs:
618
 
619
+
620
  if '\\begin{tikzpicture}' not in outputs:
621
  html_path_2 = save_render_file
622
  right_num = outputs.count('\\right')
 
631
  outputs_list = outputs.split('\n')
632
  gt= ''
633
  for out in outputs_list:
634
+ gt += '"' + out.replace('\\', '\\\\') + r'\n' + '"' + '+' + '\n'
635
+
636
  gt = gt[:-2]
637
 
638
 
 
652
  out = out[:-1]
653
  if out is None:
654
  break
655
+
656
  if out:
657
  if out[-1] != ';':
658
  gt += out[:-1] + ';\n'
 
671
  return response_str
672
 
673
  def dynamic_preprocess(self, image, min_num=1, max_num=6, image_size=1024, use_thumbnail=True):
674
+
675
  def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
676
  best_ratio_diff = float('inf')
677
  best_ratio = (1, 1)
 
687
  best_ratio = ratio
688
  # print(f'width: {width}, height: {height}, best_ratio: {best_ratio}')
689
  return best_ratio
690
+
691
  orig_width, orig_height = image.size
692
  aspect_ratio = orig_width / orig_height
693
 
 
785
 
786
 
787
  if use_im_start_end:
788
+ qs = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_PATCH_TOKEN*image_token_len*ll + DEFAULT_IM_END_TOKEN + '\n' + qs
789
  else:
790
  qs = DEFAULT_IMAGE_TOKEN + '\n' + qs
791
 
 
812
 
813
  inputs = tokenizer([prompt])
814
 
815
+ input_ids = torch.as_tensor(inputs.input_ids).to(self.device)
816
 
817
  stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
818
  keywords = [stop_str]
 
823
  with torch.autocast("cuda", dtype=torch.bfloat16):
824
  output_ids = self.generate(
825
  input_ids,
826
+ images=[image_list.to(self.device)],
827
  do_sample=False,
828
  num_beams = 1,
829
  # no_repeat_ngram_size = 20,
 
835
  with torch.autocast("cuda", dtype=torch.bfloat16):
836
  output_ids = self.generate(
837
  input_ids,
838
+ images=[image_list.to(self.device)],
839
  do_sample=False,
840
  num_beams = 1,
841
  # no_repeat_ngram_size = 20,