Mageia's picture
add: pdf2images
8ec1357 unverified
raw
history blame
1.38 kB
import os
import uuid
import fitz # PyMuPDF
import gradio as gr
from PIL import Image
UPLOAD_FOLDER = "./uploads"
RESULTS_FOLDER = "./results"
def pdf_to_images(pdf_path):
images = []
pdf_document = fitz.open(pdf_path)
for page_num in range(len(pdf_document)):
page = pdf_document.load_page(page_num)
pix = page.get_pixmap()
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
images.append(img)
pdf_document.close()
return images
def process_pdf(pdf_file):
temp_pdf_path = os.path.join(UPLOAD_FOLDER, f"{uuid.uuid4()}.pdf")
pdf_file.save(temp_pdf_path)
images = pdf_to_images(temp_pdf_path)
os.remove(temp_pdf_path)
return images
def display_images(images):
image_elements = [gr.Image(value=img, type="pil") for img in images]
return gr.Gallery(value=image_elements)
def on_image_select(image):
return image
with gr.Blocks() as demo:
pdf_input = gr.File(label="上传PDF文件")
image_gallery = gr.Gallery(label="PDF页面预览", columns=3, height="auto")
selected_image = gr.Image(label="选中的图片", type="pil")
pdf_input.upload(fn=process_pdf, inputs=pdf_input, outputs=image_gallery)
image_gallery.select(fn=on_image_select, inputs=image_gallery, outputs=selected_image)
# 这里可以添加OCR转换功能的相关组件和逻辑