Spaces:

Mageia
/

GOT-OCR-Optimize

Running

App Files Files Community

Mageia commited on Oct 16, 2024

Commit

62c2edb

unverified ·

1 Parent(s): 61a37e8

fix: process pdf once

Browse files

Files changed (1) hide show

app.py +40 -6

app.py CHANGED Viewed

@@ -1,3 +1,6 @@
 import gradio as gr
 import spaces
 import torch
@@ -12,15 +15,35 @@ model = model.eval().to(device)
 @spaces.GPU()
-def ocr_process(image):
     if image is None:
-        return "错误:未提供图片"
     try:
-        res = model.chat(tokenizer, image, ocr_type="ocr")
-        return res
     except Exception as e:
-        return f"错误: {str(e)}"
 with gr.Blocks() as demo:
@@ -29,11 +52,22 @@ with gr.Blocks() as demo:
     with gr.Row():
         image_input = gr.Image(type="filepath", label="上传图片")
     submit_button = gr.Button("开始OCR识别")
     output_text = gr.Textbox(label="识别结果")
-    submit_button.click(ocr_process, inputs=[image_input], outputs=[output_text])
 if __name__ == "__main__":
     demo.launch()

+import base64
+import os
 import gradio as gr
 import spaces
 import torch
 @spaces.GPU()
+def ocr_process(image, got_mode, ocr_color="", ocr_box=""):
     if image is None:
+        return "错误:未提供图片", None
     try:
+        image_path = image
+        result_path = f"{os.path.splitext(image_path)[0]}_result.html"
+        if "plain" in got_mode:
+            if "multi-crop" in got_mode:
+                res = model.chat_crop(tokenizer, image_path, ocr_type="ocr")
+            else:
+                res = model.chat(tokenizer, image_path, ocr_type="ocr", ocr_box=ocr_box, ocr_color=ocr_color)
+            return res, None
+        elif "format" in got_mode:
+            if "multi-crop" in got_mode:
+                res = model.chat_crop(tokenizer, image_path, ocr_type="format", render=True, save_render_file=result_path)
+            else:
+                res = model.chat(tokenizer, image_path, ocr_type="format", ocr_box=ocr_box, ocr_color=ocr_color, render=True, save_render_file=result_path)
+            if os.path.exists(result_path):
+                with open(result_path, "r") as f:
+                    html_content = f.read()
+                encoded_html = base64.b64encode(html_content.encode("utf-8")).decode("utf-8")
+                return res, encoded_html
+        return "错误: 未知的OCR模式", None
     except Exception as e:
+        return f"错误: {str(e)}", None
 with gr.Blocks() as demo:
     with gr.Row():
         image_input = gr.Image(type="filepath", label="上传图片")
+    got_mode = gr.Dropdown(
+        choices=["plain texts OCR", "format texts OCR", "plain multi-crop OCR", "format multi-crop OCR", "plain fine-grained OCR", "format fine-grained OCR"],
+        label="OCR模式",
+        value="plain texts OCR",
+    )
+    with gr.Row():
+        ocr_color = gr.Textbox(label="OCR颜色 (仅用于fine-grained模式)")
+        ocr_box = gr.Textbox(label="OCR边界框 (仅用于fine-grained模式)")
     submit_button = gr.Button("开始OCR识别")
     output_text = gr.Textbox(label="识别结果")
+    output_html = gr.HTML(label="格式化结果")
+    submit_button.click(ocr_process, inputs=[image_input, got_mode, ocr_color, ocr_box], outputs=[output_text, output_html])
 if __name__ == "__main__":
     demo.launch()