Mageia commited on
Commit
62c2edb
·
unverified ·
1 Parent(s): 61a37e8

fix: process pdf once

Browse files
Files changed (1) hide show
  1. app.py +40 -6
app.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  import gradio as gr
2
  import spaces
3
  import torch
@@ -12,15 +15,35 @@ model = model.eval().to(device)
12
 
13
 
14
  @spaces.GPU()
15
- def ocr_process(image):
16
  if image is None:
17
- return "错误:未提供图片"
18
 
19
  try:
20
- res = model.chat(tokenizer, image, ocr_type="ocr")
21
- return res
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  except Exception as e:
23
- return f"错误: {str(e)}"
24
 
25
 
26
  with gr.Blocks() as demo:
@@ -29,11 +52,22 @@ with gr.Blocks() as demo:
29
  with gr.Row():
30
  image_input = gr.Image(type="filepath", label="上传图片")
31
 
 
 
 
 
 
 
 
 
 
 
32
  submit_button = gr.Button("开始OCR识别")
33
 
34
  output_text = gr.Textbox(label="识别结果")
 
35
 
36
- submit_button.click(ocr_process, inputs=[image_input], outputs=[output_text])
37
 
38
  if __name__ == "__main__":
39
  demo.launch()
 
1
+ import base64
2
+ import os
3
+
4
  import gradio as gr
5
  import spaces
6
  import torch
 
15
 
16
 
17
  @spaces.GPU()
18
+ def ocr_process(image, got_mode, ocr_color="", ocr_box=""):
19
  if image is None:
20
+ return "错误:未提供图片", None
21
 
22
  try:
23
+ image_path = image
24
+ result_path = f"{os.path.splitext(image_path)[0]}_result.html"
25
+
26
+ if "plain" in got_mode:
27
+ if "multi-crop" in got_mode:
28
+ res = model.chat_crop(tokenizer, image_path, ocr_type="ocr")
29
+ else:
30
+ res = model.chat(tokenizer, image_path, ocr_type="ocr", ocr_box=ocr_box, ocr_color=ocr_color)
31
+ return res, None
32
+ elif "format" in got_mode:
33
+ if "multi-crop" in got_mode:
34
+ res = model.chat_crop(tokenizer, image_path, ocr_type="format", render=True, save_render_file=result_path)
35
+ else:
36
+ res = model.chat(tokenizer, image_path, ocr_type="format", ocr_box=ocr_box, ocr_color=ocr_color, render=True, save_render_file=result_path)
37
+
38
+ if os.path.exists(result_path):
39
+ with open(result_path, "r") as f:
40
+ html_content = f.read()
41
+ encoded_html = base64.b64encode(html_content.encode("utf-8")).decode("utf-8")
42
+ return res, encoded_html
43
+
44
+ return "错误: 未知的OCR模式", None
45
  except Exception as e:
46
+ return f"错误: {str(e)}", None
47
 
48
 
49
  with gr.Blocks() as demo:
 
52
  with gr.Row():
53
  image_input = gr.Image(type="filepath", label="上传图片")
54
 
55
+ got_mode = gr.Dropdown(
56
+ choices=["plain texts OCR", "format texts OCR", "plain multi-crop OCR", "format multi-crop OCR", "plain fine-grained OCR", "format fine-grained OCR"],
57
+ label="OCR模式",
58
+ value="plain texts OCR",
59
+ )
60
+
61
+ with gr.Row():
62
+ ocr_color = gr.Textbox(label="OCR颜色 (仅用于fine-grained模式)")
63
+ ocr_box = gr.Textbox(label="OCR边界框 (仅用于fine-grained模式)")
64
+
65
  submit_button = gr.Button("开始OCR识别")
66
 
67
  output_text = gr.Textbox(label="识别结果")
68
+ output_html = gr.HTML(label="格式化结果")
69
 
70
+ submit_button.click(ocr_process, inputs=[image_input, got_mode, ocr_color, ocr_box], outputs=[output_text, output_html])
71
 
72
  if __name__ == "__main__":
73
  demo.launch()