Spaces:
Sleeping
Sleeping
File size: 5,601 Bytes
4dcdb35 18def71 4dcdb35 8ec1357 4dcdb35 18def71 4dcdb35 18def71 4dcdb35 8ec1357 18def71 8ec1357 18def71 1612798 18def71 8ec1357 4dcdb35 8ec1357 18def71 8ec1357 18def71 8ec1357 18def71 4dcdb35 18def71 4dcdb35 7ee1423 6d6872c 7ee1423 70b6f98 7ee1423 61f1538 6d6872c 61f1538 3db4b7a 61f1538 3db4b7a 4dcdb35 70b6f98 4dcdb35 8ec1357 86b715d aff59fd 86b715d 18def71 6d6872c 7ee1423 70b6f98 7ee1423 fffa248 7ee1423 70b6f98 61f1538 7ee1423 6d6872c 7ee1423 e9361c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import os
import shutil
import uuid
import fitz # PyMuPDF
import gradio as gr
from modelscope import AutoModel, AutoTokenizer
from PIL import Image, ImageEnhance
from got_ocr import got_ocr
# 初始化模型和分词器
tokenizer = AutoTokenizer.from_pretrained("stepfun-ai/GOT-OCR2_0", trust_remote_code=True)
model = AutoModel.from_pretrained("stepfun-ai/GOT-OCR2_0", trust_remote_code=True, low_cpu_mem_usage=True, device_map="cuda", use_safetensors=True)
model = model.eval().cuda()
UPLOAD_FOLDER = "./uploads"
RESULTS_FOLDER = "./results"
# 确保必要的文件夹存在
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(RESULTS_FOLDER, exist_ok=True)
def pdf_to_images(pdf_path):
images = []
pdf_document = fitz.open(pdf_path)
for page_num in range(len(pdf_document)):
page = pdf_document.load_page(page_num)
# 进一步增加分辨率和缩放比例
zoom = 4 # 增加缩放比例到4
mat = fitz.Matrix(zoom, zoom)
pix = page.get_pixmap(matrix=mat, alpha=False)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
# 增对比度
enhancer = ImageEnhance.Contrast(img)
img = enhancer.enhance(1.5) # 增加50%的对比度
images.append(img)
pdf_document.close()
return images
def process_pdf(pdf_file):
if pdf_file is None:
return None
temp_pdf_path = os.path.join(UPLOAD_FOLDER, f"{uuid.uuid4()}.pdf")
# 使用 shutil 复制上传的件到临时位置
shutil.copy(pdf_file.name, temp_pdf_path)
images = pdf_to_images(temp_pdf_path)
os.remove(temp_pdf_path)
# 将图像保存为临时文件并返回文件路径列表
image_paths = []
for i, img in enumerate(images):
img_path = os.path.join(RESULTS_FOLDER, f"page_{i+1}.png")
img.save(img_path, "PNG")
image_paths.append(img_path)
return image_paths
def on_image_select(evt: gr.SelectData):
if evt.index is not None:
return evt.index
return None
# 更新perform_ocr函数的输入参数
def perform_ocr(selected_index, image_gallery, got_mode, fine_grained_type, color, box):
if selected_index is None or len(image_gallery) == 0:
return "请先选择一张图片"
selected_image = image_gallery[selected_index][0]
# 根据选择的任务和参数调用GOT OCR
ocr_color = color if fine_grained_type == "color" else ""
ocr_box = box if fine_grained_type == "box" else ""
result, html_content = got_ocr(
model,
tokenizer,
selected_image,
got_mode=got_mode,
fine_grained_mode=fine_grained_type,
ocr_color=ocr_color,
ocr_box=ocr_box,
)
if html_content:
iframe_src = f"data:text/html;base64,{html_content}"
iframe = f'<iframe src="{iframe_src}" width="100%" height="600px"></iframe>'
download_link = f'<a href="data:text/html;base64,{html_content}" download="result.html">下载完整结果</a>'
return gr.Markdown(f"{download_link}\n\n{iframe}")
else:
return gr.Markdown(result)
def task_update(task):
if "fine-grained" in task:
return [gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)]
else:
return [gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)]
def fine_grained_update(fine_grained_type):
if fine_grained_type == "color":
return [gr.update(visible=True), gr.update(visible=False)]
elif fine_grained_type == "box":
return [gr.update(visible=False), gr.update(visible=True)]
else:
return [gr.update(visible=False), gr.update(visible=False)]
with gr.Blocks() as demo:
pdf_input = gr.File(label="上传PDF文件")
image_gallery = gr.Gallery(
label="PDF页面预览",
columns=3,
height=600,
object_fit="contain",
preview=True,
show_download_button=True, # 显示下载按钮
show_fullscreen_button=True, # 显示全屏按钮
allow_preview=True, # 允许预览(包括全屏查看)
)
selected_index = gr.State(None)
pdf_input.upload(fn=process_pdf, inputs=pdf_input, outputs=image_gallery)
image_gallery.select(fn=on_image_select, inputs=[], outputs=selected_index)
####################
task_dropdown = gr.Dropdown(
choices=["plain texts OCR", "format texts OCR", "plain multi-crop OCR", "format multi-crop OCR", "plain fine-grained OCR", "format fine-grained OCR"],
label="选择GOT模式",
value="format texts OCR",
)
fine_grained_dropdown = gr.Dropdown(choices=["box", "color"], label="fine-grained类型", visible=False)
color_dropdown = gr.Dropdown(choices=["red", "green", "blue"], label="颜色列表", visible=False)
box_input = gr.Textbox(label="输入框: [x1,y1,x2,y2]", placeholder="例如: [0,0,100,100]", visible=False)
ocr_button = gr.Button("开始OCR识别")
ocr_result = gr.HTML(label="OCR结果") # 将Textbox更改为HTML组件
task_dropdown.change(task_update, inputs=[task_dropdown], outputs=[fine_grained_dropdown, color_dropdown, box_input])
fine_grained_dropdown.change(fine_grained_update, inputs=[fine_grained_dropdown], outputs=[color_dropdown, box_input])
# 更新ocr_button的click事件,传递所有必要的参数
ocr_button.click(
fn=perform_ocr,
inputs=[selected_index, image_gallery, task_dropdown, fine_grained_dropdown, color_dropdown, box_input],
outputs=ocr_result,
)
if __name__ == "__main__":
demo.launch()
|