innoai commited on
Commit
28de931
·
verified ·
1 Parent(s): 3bc4b55

Delete pdf_to_jpg_gradio5_en.py

Browse files
Files changed (1) hide show
  1. pdf_to_jpg_gradio5_en.py +0 -197
pdf_to_jpg_gradio5_en.py DELETED
@@ -1,197 +0,0 @@
1
- # pdf_to_jpg_gradio.py
2
-
3
- import gradio as gr
4
- import io
5
- import uuid
6
- import numpy as np
7
- from PIL import Image, ImageDraw
8
- from pdf2image import convert_from_bytes
9
- import zipfile
10
- import os
11
-
12
- def convert_pdf_to_combined_image(pdf_bytes, output_size="A4", dpi=96, line_color=(200, 200, 200)):
13
- """
14
- 将 PDF 文件的字节数据(pdf_bytes)转化为一张包含所有页面的长图。
15
- 参数:
16
- pdf_bytes: PDF 文件的字节内容
17
- output_size: 输出尺寸,支持 Original / A4 / A3
18
- dpi: 像素密度,默认 96
19
- line_color: 分割线颜色,默认为浅灰色 (200,200,200)
20
- 返回:
21
- combined_image: 拼接完成的 PIL Image 对象
22
- """
23
- images = convert_from_bytes(pdf_bytes)
24
- num_pages = len(images)
25
-
26
- sizes = {
27
- "Original": images[0].size,
28
- "A4": (int(210 * dpi / 25.4), int(297 * dpi / 25.4)),
29
- "A3": (int(297 * dpi / 25.4), int(420 * dpi / 25.4))
30
- }
31
-
32
- output_width, output_height_per_page = sizes[output_size]
33
- output_height = output_height_per_page * num_pages
34
-
35
- combined_image = Image.new("RGB", (output_width, output_height), "white")
36
-
37
- y_offset = 0
38
- for img in images:
39
- # 调整每页大小
40
- img_resized = img.resize((output_width, output_height_per_page))
41
- combined_image.paste(img_resized, (0, y_offset))
42
- y_offset += output_height_per_page
43
-
44
- # 绘制分割线
45
- draw = ImageDraw.Draw(combined_image)
46
- line_position = output_height_per_page
47
- for i in range(num_pages - 1):
48
- draw.line([(0, line_position * (i + 1)), (output_width, line_position * (i + 1))],
49
- fill=line_color, width=2)
50
-
51
- return combined_image
52
-
53
- def convert_pdf_to_multiple_images(pdf_bytes, output_size="A4", dpi=96):
54
- """
55
- 将 PDF 文件的字节数据(pdf_bytes)转换为多个单独的图片(每一页一个)。
56
- 参数:
57
- pdf_bytes: PDF 文件的字节内容
58
- output_size: 输出尺寸,支持 Original / A4 / A3
59
- dpi: 像素密度,默认96
60
- 返回:
61
- images_list: PIL Image 对象的列表,每个元素对应一页 PDF。
62
- """
63
- images = convert_from_bytes(pdf_bytes)
64
-
65
- sizes = {
66
- "Original": images[0].size,
67
- "A4": (int(210 * dpi / 25.4), int(297 * dpi / 25.4)),
68
- "A3": (int(297 * dpi / 25.4), int(420 * dpi / 25.4))
69
- }
70
- output_width, output_height = sizes[output_size]
71
-
72
- images_list = []
73
- for img in images:
74
- # 调整每页大小
75
- img_resized = img.resize((output_width, output_height))
76
- images_list.append(img_resized)
77
-
78
- return images_list
79
-
80
- def pdf_to_jpg_or_zip(pdf_file, output_size, convert_mode):
81
- """
82
- 根据用户选择的 convert_mode,决定输出单张长图或多个图片(压缩包)。
83
- 参数:
84
- pdf_file: Gradio 上传的 PDF 文件对象 (binary)
85
- output_size: 'Original' / 'A4' / 'A3'
86
- convert_mode: 'Single Image' 或 'Multiple Images'
87
- 返回:
88
- (display_image_array, file_path)
89
- 如果是单图:
90
- display_image_array: numpy 数组(用于预览)
91
- file_path: 合并后的单张图片的唯一文件路径
92
- 如果是多图:
93
- display_image_array: None(不预览)
94
- file_path: 打包好的 zip 文件的唯一文件路径
95
- """
96
- if pdf_file is None:
97
- return None, None
98
-
99
- # 将上传的pdf_file(字节流)取出来
100
- pdf_bytes = pdf_file
101
-
102
- if convert_mode == "Single Image":
103
- # 调用原有拼接逻辑
104
- combined_image = convert_pdf_to_combined_image(pdf_bytes, output_size=output_size)
105
- display_image_array = np.array(combined_image)
106
-
107
- # 使用 uuid 生成唯一文件名,保存单张 JPG
108
- unique_filename = f"{uuid.uuid4()}.jpg"
109
- combined_image.save(unique_filename, format="JPEG")
110
- return display_image_array, unique_filename
111
-
112
- else: # convert_mode == "Multiple Images"
113
- # 将 PDF 转换为多个图片
114
- images_list = convert_pdf_to_multiple_images(pdf_bytes, output_size=output_size)
115
-
116
- # 创建唯一 zip 文件
117
- zip_filename = f"{uuid.uuid4()}.zip"
118
- with zipfile.ZipFile(zip_filename, 'w') as zipf:
119
- # 依次将每张图片写入压缩包
120
- for idx, img in enumerate(images_list):
121
- page_jpg_filename = f"page_{idx+1}.jpg"
122
- img.save(page_jpg_filename, "JPEG")
123
- zipf.write(page_jpg_filename)
124
- # 写入后删除临时文件
125
- os.remove(page_jpg_filename)
126
-
127
- # 返回 None 不显示预览
128
- return None, zip_filename
129
-
130
- # ========== Gradio 应用界面部分 ==========
131
- with gr.Blocks(
132
- title="PDF to JPG Converter - High Quality PDF Merging",
133
- css=".gradio-container {max-width: 800px; margin: 0 auto;}"
134
- ) as demo:
135
- gr.Markdown("<h1 style='text-align: center;'>PDF to JPG Online Converter</h1>")
136
-
137
- gr.Markdown("""
138
- <p style='text-align: center; font-size: 16px;'>
139
- Welcome to our PDF to JPG Online Converter! This tool supports merging all PDF pages into one single long image
140
- or converting each page into separate images for easy viewing and sharing.<br/>
141
- Supports Original, A4, and A3 page sizes for stitching, fast conversion, high-quality output, and free to use.
142
- </p>
143
- """)
144
-
145
- gr.Markdown("""
146
- <hr/>
147
- <h3>How to Use:</h3>
148
- <ol>
149
- <li>Upload your PDF file in the "Choose PDF File" section below.</li>
150
- <li>Select your desired output size (Original, A4, or A3) from the dropdown menu.</li>
151
- <li>Select whether you want "Multiple Images" or "Single Image".</li>
152
- <li>Click the "Convert" button and wait a few seconds to preview the result (only if single image).</li>
153
- <li>Click the "Download File" button to save the result to your local device.</li>
154
- </ol>
155
- <hr/>
156
- """)
157
-
158
- with gr.Row():
159
- # pdf_input 设置为binary类型,收到的是字节流
160
- pdf_input = gr.File(label="Choose PDF File", file_types=[".pdf"], type="binary")
161
- size_dropdown = gr.Dropdown(
162
- choices=["Original", "A4", "A3"],
163
- value="A4",
164
- label="Choose Output Size"
165
- )
166
- # 默认选项为 Multiple Images
167
- conversion_mode = gr.Radio(
168
- choices=["Multiple Images", "Single Image"],
169
- value="Multiple Images",
170
- label="Conversion Mode"
171
- )
172
-
173
- # 在 Single Image 模式下增加英文提示说明
174
- gr.Markdown("""
175
- <p style='color: red; font-size: 14px;'>
176
- Notice: If you choose <b>Single Image</b> mode but your PDF has many pages, you might encounter
177
- maximum height limitations that can cause errors or improper display.
178
- In that case, please consider choosing <b>Multiple Images</b> mode.
179
- </p>
180
- """)
181
-
182
- convert_button = gr.Button("Convert")
183
-
184
- with gr.Row():
185
- image_output = gr.Image(label="Preview", type="numpy")
186
- download_output = gr.File(label="Download File", type="filepath")
187
-
188
- # 输入: pdf_input, size_dropdown, conversion_mode
189
- # 输出: image_output, download_output
190
- convert_button.click(
191
- fn=pdf_to_jpg_or_zip,
192
- inputs=[pdf_input, size_dropdown, conversion_mode],
193
- outputs=[image_output, download_output]
194
- )
195
-
196
- if __name__ == "__main__":
197
- demo.launch(ssr_mode=True, show_api=False, show_error=True)