Spaces:

innoai
/

PDF-To-JPG-Gradio5

Running

App Files Files Community

innoai commited on 7 days ago

Commit

28de931

verified ·

1 Parent(s): 3bc4b55

Delete pdf_to_jpg_gradio5_en.py

Browse files

Files changed (1) hide show

pdf_to_jpg_gradio5_en.py +0 -197

pdf_to_jpg_gradio5_en.py DELETED Viewed

@@ -1,197 +0,0 @@
-# pdf_to_jpg_gradio.py
-import gradio as gr
-import io
-import uuid
-import numpy as np
-from PIL import Image, ImageDraw
-from pdf2image import convert_from_bytes
-import zipfile
-import os
-def convert_pdf_to_combined_image(pdf_bytes, output_size="A4", dpi=96, line_color=(200, 200, 200)):
-    """
-    将 PDF 文件的字节数据(pdf_bytes)转化为一张包含所有页面的长图。
-    参数:
-        pdf_bytes:  PDF 文件的字节内容
-        output_size: 输出尺寸，支持 Original / A4 / A3
-        dpi:         像素密度，默认 96
-        line_color:  分割线颜色，默认为浅灰色 (200,200,200)
-    返回:
-        combined_image: 拼接完成的 PIL Image 对象
-    """
-    images = convert_from_bytes(pdf_bytes)
-    num_pages = len(images)
-    sizes = {
-        "Original": images[0].size,
-        "A4": (int(210 * dpi / 25.4), int(297 * dpi / 25.4)),
-        "A3": (int(297 * dpi / 25.4), int(420 * dpi / 25.4))
-    }
-    output_width, output_height_per_page = sizes[output_size]
-    output_height = output_height_per_page * num_pages
-    combined_image = Image.new("RGB", (output_width, output_height), "white")
-    y_offset = 0
-    for img in images:
-        # 调整每页大小
-        img_resized = img.resize((output_width, output_height_per_page))
-        combined_image.paste(img_resized, (0, y_offset))
-        y_offset += output_height_per_page
-    # 绘制分割线
-    draw = ImageDraw.Draw(combined_image)
-    line_position = output_height_per_page
-    for i in range(num_pages - 1):
-        draw.line([(0, line_position * (i + 1)), (output_width, line_position * (i + 1))],
-                  fill=line_color, width=2)
-    return combined_image
-def convert_pdf_to_multiple_images(pdf_bytes, output_size="A4", dpi=96):
-    """
-    将 PDF 文件的字节数据(pdf_bytes)转换为多个单独的图片（每一页一个）。
-    参数:
-        pdf_bytes:  PDF 文件的字节内容
-        output_size: 输出尺寸，支持 Original / A4 / A3
-        dpi:         像素密度，默认96
-    返回:
-        images_list: PIL Image 对象的列表，每个元素对应一页 PDF。
-    """
-    images = convert_from_bytes(pdf_bytes)
-    sizes = {
-        "Original": images[0].size,
-        "A4": (int(210 * dpi / 25.4), int(297 * dpi / 25.4)),
-        "A3": (int(297 * dpi / 25.4), int(420 * dpi / 25.4))
-    }
-    output_width, output_height = sizes[output_size]
-    images_list = []
-    for img in images:
-        # 调整每页大小
-        img_resized = img.resize((output_width, output_height))
-        images_list.append(img_resized)
-    return images_list
-def pdf_to_jpg_or_zip(pdf_file, output_size, convert_mode):
-    """
-    根据用户选择的 convert_mode，决定输出单张长图或多个图片(压缩包)。
-    参数:
-        pdf_file:     Gradio 上传的 PDF 文件对象 (binary)
-        output_size:  'Original' / 'A4' / 'A3'
-        convert_mode: 'Single Image' 或 'Multiple Images'
-    返回:
-        (display_image_array, file_path)
-        如果是单图:
-            display_image_array: numpy 数组（用于预览）
-            file_path:           合并后的单张图片的唯一文件路径
-        如果是多图:
-            display_image_array: None（不预览）
-            file_path:           打包好的 zip 文件的唯一文件路径
-    """
-    if pdf_file is None:
-        return None, None
-    # 将上传的pdf_file（字节流）取出来
-    pdf_bytes = pdf_file
-    if convert_mode == "Single Image":
-        # 调用原有拼接逻辑
-        combined_image = convert_pdf_to_combined_image(pdf_bytes, output_size=output_size)
-        display_image_array = np.array(combined_image)
-        # 使用 uuid 生成唯一文件名，保存单张 JPG
-        unique_filename = f"{uuid.uuid4()}.jpg"
-        combined_image.save(unique_filename, format="JPEG")
-        return display_image_array, unique_filename
-    else:  # convert_mode == "Multiple Images"
-        # 将 PDF 转换为多个图片
-        images_list = convert_pdf_to_multiple_images(pdf_bytes, output_size=output_size)
-        # 创建唯一 zip 文件
-        zip_filename = f"{uuid.uuid4()}.zip"
-        with zipfile.ZipFile(zip_filename, 'w') as zipf:
-            # 依次将每张图片写入压缩包
-            for idx, img in enumerate(images_list):
-                page_jpg_filename = f"page_{idx+1}.jpg"
-                img.save(page_jpg_filename, "JPEG")
-                zipf.write(page_jpg_filename)
-                # 写入后删除临时文件
-                os.remove(page_jpg_filename)
-        # 返回 None 不显示预览
-        return None, zip_filename
-# ========== Gradio 应用界面部分 ==========
-with gr.Blocks(
-    title="PDF to JPG Converter - High Quality PDF Merging",
-    css=".gradio-container {max-width: 800px; margin: 0 auto;}"
-) as demo:
-    gr.Markdown("<h1 style='text-align: center;'>PDF to JPG Online Converter</h1>")
-    gr.Markdown("""
-    <p style='text-align: center; font-size: 16px;'>
-    Welcome to our PDF to JPG Online Converter! This tool supports merging all PDF pages into one single long image
-    or converting each page into separate images for easy viewing and sharing.<br/>
-    Supports Original, A4, and A3 page sizes for stitching, fast conversion, high-quality output, and free to use.
-    </p>
-    """)
-    gr.Markdown("""
-    <hr/>
-    <h3>How to Use:</h3>
-    <ol>
-        <li>Upload your PDF file in the "Choose PDF File" section below.</li>
-        <li>Select your desired output size (Original, A4, or A3) from the dropdown menu.</li>
-        <li>Select whether you want "Multiple Images" or "Single Image".</li>
-        <li>Click the "Convert" button and wait a few seconds to preview the result (only if single image).</li>
-        <li>Click the "Download File" button to save the result to your local device.</li>
-    </ol>
-    <hr/>
-    """)
-    with gr.Row():
-        # pdf_input 设置为binary类型，收到的是字节流
-        pdf_input = gr.File(label="Choose PDF File", file_types=[".pdf"], type="binary")
-        size_dropdown = gr.Dropdown(
-            choices=["Original", "A4", "A3"],
-            value="A4",
-            label="Choose Output Size"
-        )
-        # 默认选项为 Multiple Images
-        conversion_mode = gr.Radio(
-            choices=["Multiple Images", "Single Image"],
-            value="Multiple Images",
-            label="Conversion Mode"
-        )
-    # 在 Single Image 模式下增加英文提示说明
-    gr.Markdown("""
-    <p style='color: red; font-size: 14px;'>
-    Notice: If you choose <b>Single Image</b> mode but your PDF has many pages, you might encounter
-    maximum height limitations that can cause errors or improper display.
-    In that case, please consider choosing <b>Multiple Images</b> mode.
-    </p>
-    """)
-    convert_button = gr.Button("Convert")
-    with gr.Row():
-        image_output = gr.Image(label="Preview", type="numpy")
-        download_output = gr.File(label="Download File", type="filepath")
-    # 输入: pdf_input, size_dropdown, conversion_mode
-    # 输出: image_output, download_output
-    convert_button.click(
-        fn=pdf_to_jpg_or_zip,
-        inputs=[pdf_input, size_dropdown, conversion_mode],
-        outputs=[image_output, download_output]
-    )
-if __name__ == "__main__":
-    demo.launch(ssr_mode=True, show_api=False, show_error=True)