Spaces:
Sleeping
Sleeping
import os | |
import uuid | |
import fitz # PyMuPDF | |
import gradio as gr | |
from PIL import Image | |
UPLOAD_FOLDER = "./uploads" | |
RESULTS_FOLDER = "./results" | |
def pdf_to_images(pdf_path): | |
images = [] | |
pdf_document = fitz.open(pdf_path) | |
for page_num in range(len(pdf_document)): | |
page = pdf_document.load_page(page_num) | |
pix = page.get_pixmap() | |
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
images.append(img) | |
pdf_document.close() | |
return images | |
def process_pdf(pdf_file): | |
temp_pdf_path = os.path.join(UPLOAD_FOLDER, f"{uuid.uuid4()}.pdf") | |
pdf_file.save(temp_pdf_path) | |
images = pdf_to_images(temp_pdf_path) | |
os.remove(temp_pdf_path) | |
return images | |
def display_images(images): | |
image_elements = [gr.Image(value=img, type="pil") for img in images] | |
return gr.Gallery(value=image_elements) | |
def on_image_select(image): | |
return image | |
with gr.Blocks() as demo: | |
pdf_input = gr.File(label="上传PDF文件") | |
image_gallery = gr.Gallery(label="PDF页面预览", columns=3, height="auto") | |
selected_image = gr.Image(label="选中的图片", type="pil") | |
pdf_input.upload(fn=process_pdf, inputs=pdf_input, outputs=image_gallery) | |
image_gallery.select(fn=on_image_select, inputs=image_gallery, outputs=selected_image) | |
# 这里可以添加OCR转换功能的相关组件和逻辑 | |