innoai commited on
Commit
f35d252
·
verified ·
1 Parent(s): fbadc72

Update pdf_to_jpg_gradio5_en.py

Browse files
Files changed (1) hide show
  1. pdf_to_jpg_gradio5_en.py +95 -29
pdf_to_jpg_gradio5_en.py CHANGED
@@ -1,10 +1,13 @@
1
  # pdf_to_jpg_gradio.py
 
2
  import gradio as gr
3
  import io
4
  import uuid
5
  import numpy as np
6
  from PIL import Image, ImageDraw
7
  from pdf2image import convert_from_bytes
 
 
8
 
9
  def convert_pdf_to_combined_image(pdf_bytes, output_size="A4", dpi=96, line_color=(200, 200, 200)):
10
  """
@@ -47,38 +50,84 @@ def convert_pdf_to_combined_image(pdf_bytes, output_size="A4", dpi=96, line_colo
47
 
48
  return combined_image
49
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- def pdf_to_jpg(pdf_file, output_size):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  """
53
- 将上传的 PDF 文件转换为长图,并返回 (用于显示的图像[numpy], 用于下载的唯一文件路径)
54
  参数:
55
- pdf_file: Gradio 上传的 PDF 文件对象
56
- output_size: 用户选择的输出尺寸(Original / A4 / A3
 
57
  返回:
58
  (display_image_array, file_path)
59
- display_image_array: numpy 数组,用于在 gr.Image(type="numpy") 中显示
60
- file_path: string,唯一文件路径,用于 gr.File(type="filepath") 下载
 
 
 
 
61
  """
62
  if pdf_file is None:
63
  return None, None
64
 
65
- # 读取 PDF 文件的字节内容
66
  pdf_bytes = pdf_file
67
-
68
- # 调用拼接函数
69
- combined_image = convert_pdf_to_combined_image(pdf_bytes, output_size=output_size)
70
-
71
- # 转为 numpy 数组,供 gr.Image(type="numpy") 显示
72
- display_image_array = np.array(combined_image)
73
-
74
- # 使用 uuid 生成唯一文件名
75
- unique_filename = f"{uuid.uuid4()}.jpg"
76
- combined_image.save(unique_filename, format="JPEG")
77
-
78
- return display_image_array, unique_filename
79
 
80
-
81
- # ========== Gradio 应用界面部分 (英文界面) ==========
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  with gr.Blocks(
83
  title="PDF to JPG Converter - High Quality PDF Merging",
84
  css=".gradio-container {max-width: 800px; margin: 0 auto;}"
@@ -87,8 +136,9 @@ with gr.Blocks(
87
 
88
  gr.Markdown("""
89
  <p style='text-align: center; font-size: 16px;'>
90
- Welcome to our PDF to JPG Online Converter! This tool supports merging all PDF pages into one single long image for easy viewing and sharing.<br/>
91
- <b>SEO Summary:</b> Supports Original, A4, and A3 page sizes for stitching, fast conversion, high-quality output, and free to use.
 
92
  </p>
93
  """)
94
 
@@ -98,8 +148,9 @@ with gr.Blocks(
98
  <ol>
99
  <li>Upload your PDF file in the "Choose PDF File" section below.</li>
100
  <li>Select your desired output size (Original, A4, or A3) from the dropdown menu.</li>
101
- <li>Click the "Convert" button and wait a few seconds to preview the result.</li>
102
- <li>Click the "Download File" button to save the JPG image to your local device.</li>
 
103
  </ol>
104
  <hr/>
105
  """)
@@ -112,18 +163,33 @@ with gr.Blocks(
112
  value="A4",
113
  label="Choose Output Size"
114
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  convert_button = gr.Button("Convert")
117
 
118
  with gr.Row():
119
- # type="numpy",便于直接显示 numpy 数组
120
  image_output = gr.Image(label="Preview", type="numpy")
121
- # type="filepath",返回本地文件路径,避免串名
122
  download_output = gr.File(label="Download File", type="filepath")
123
 
 
 
124
  convert_button.click(
125
- fn=pdf_to_jpg,
126
- inputs=[pdf_input, size_dropdown],
127
  outputs=[image_output, download_output]
128
  )
129
 
 
1
  # pdf_to_jpg_gradio.py
2
+
3
  import gradio as gr
4
  import io
5
  import uuid
6
  import numpy as np
7
  from PIL import Image, ImageDraw
8
  from pdf2image import convert_from_bytes
9
+ import zipfile
10
+ import os
11
 
12
  def convert_pdf_to_combined_image(pdf_bytes, output_size="A4", dpi=96, line_color=(200, 200, 200)):
13
  """
 
50
 
51
  return combined_image
52
 
53
+ def convert_pdf_to_multiple_images(pdf_bytes, output_size="A4", dpi=96):
54
+ """
55
+ 将 PDF 文件的字节数据(pdf_bytes)转换为多个单独的图片(每一页一个)。
56
+ 参数:
57
+ pdf_bytes: PDF 文件的字节内容
58
+ output_size: 输出尺寸,支持 Original / A4 / A3
59
+ dpi: 像素密度,默认96
60
+ 返回:
61
+ images_list: PIL Image 对象的列表,每个元素对应一页 PDF。
62
+ """
63
+ images = convert_from_bytes(pdf_bytes)
64
 
65
+ sizes = {
66
+ "Original": images[0].size,
67
+ "A4": (int(210 * dpi / 25.4), int(297 * dpi / 25.4)),
68
+ "A3": (int(297 * dpi / 25.4), int(420 * dpi / 25.4))
69
+ }
70
+ output_width, output_height = sizes[output_size]
71
+
72
+ images_list = []
73
+ for img in images:
74
+ # 调整每页大小
75
+ img_resized = img.resize((output_width, output_height))
76
+ images_list.append(img_resized)
77
+
78
+ return images_list
79
+
80
+ def pdf_to_jpg_or_zip(pdf_file, output_size, convert_mode):
81
  """
82
+ 根据用户选择的 convert_mode,决定输出单张长图或多个图片(压缩包)
83
  参数:
84
+ pdf_file: Gradio 上传的 PDF 文件对象 (binary)
85
+ output_size: 'Original' / 'A4' / 'A3'
86
+ convert_mode: 'Single Image' 或 'Multiple Images'
87
  返回:
88
  (display_image_array, file_path)
89
+ 如果是单图:
90
+ display_image_array: numpy 数组(用于预览)
91
+ file_path: 合并后的单张图片的唯一文件路径
92
+ 如果是多图:
93
+ display_image_array: None(不预览)
94
+ file_path: 打包好的 zip 文件的唯一文件路径
95
  """
96
  if pdf_file is None:
97
  return None, None
98
 
99
+ # 将上传的pdf_file(字节流)取出来
100
  pdf_bytes = pdf_file
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ if convert_mode == "Single Image":
103
+ # 调用原有拼接逻辑
104
+ combined_image = convert_pdf_to_combined_image(pdf_bytes, output_size=output_size)
105
+ display_image_array = np.array(combined_image)
106
+
107
+ # 使用 uuid 生成唯一文件名,保存单张 JPG
108
+ unique_filename = f"{uuid.uuid4()}.jpg"
109
+ combined_image.save(unique_filename, format="JPEG")
110
+ return display_image_array, unique_filename
111
+
112
+ else: # convert_mode == "Multiple Images"
113
+ # 将 PDF 转换为多个图片
114
+ images_list = convert_pdf_to_multiple_images(pdf_bytes, output_size=output_size)
115
+
116
+ # 创建唯一 zip 文件
117
+ zip_filename = f"{uuid.uuid4()}.zip"
118
+ with zipfile.ZipFile(zip_filename, 'w') as zipf:
119
+ # 依次将每张图片写入压缩包
120
+ for idx, img in enumerate(images_list):
121
+ page_jpg_filename = f"page_{idx+1}.jpg"
122
+ img.save(page_jpg_filename, "JPEG")
123
+ zipf.write(page_jpg_filename)
124
+ # 写入后删除临时文件
125
+ os.remove(page_jpg_filename)
126
+
127
+ # 返回 None 不显示预览
128
+ return None, zip_filename
129
+
130
+ # ========== Gradio 应用界面部分 ==========
131
  with gr.Blocks(
132
  title="PDF to JPG Converter - High Quality PDF Merging",
133
  css=".gradio-container {max-width: 800px; margin: 0 auto;}"
 
136
 
137
  gr.Markdown("""
138
  <p style='text-align: center; font-size: 16px;'>
139
+ Welcome to our PDF to JPG Online Converter! This tool supports merging all PDF pages into one single long image
140
+ or converting each page into separate images for easy viewing and sharing.<br/>
141
+ Supports Original, A4, and A3 page sizes for stitching, fast conversion, high-quality output, and free to use.
142
  </p>
143
  """)
144
 
 
148
  <ol>
149
  <li>Upload your PDF file in the "Choose PDF File" section below.</li>
150
  <li>Select your desired output size (Original, A4, or A3) from the dropdown menu.</li>
151
+ <li>Select whether you want "Multiple Images" or "Single Image".</li>
152
+ <li>Click the "Convert" button and wait a few seconds to preview the result (only if single image).</li>
153
+ <li>Click the "Download File" button to save the result to your local device.</li>
154
  </ol>
155
  <hr/>
156
  """)
 
163
  value="A4",
164
  label="Choose Output Size"
165
  )
166
+ # 默认选项为 Multiple Images
167
+ conversion_mode = gr.Radio(
168
+ choices=["Multiple Images", "Single Image"],
169
+ value="Multiple Images",
170
+ label="Conversion Mode"
171
+ )
172
+
173
+ # 在 Single Image 模式下增加英文提示说明
174
+ gr.Markdown("""
175
+ <p style='color: red; font-size: 14px;'>
176
+ Notice: If you choose <b>Single Image</b> mode but your PDF has many pages, you might encounter
177
+ maximum height limitations that can cause errors or improper display.
178
+ In that case, please consider choosing <b>Multiple Images</b> mode.
179
+ </p>
180
+ """)
181
 
182
  convert_button = gr.Button("Convert")
183
 
184
  with gr.Row():
 
185
  image_output = gr.Image(label="Preview", type="numpy")
 
186
  download_output = gr.File(label="Download File", type="filepath")
187
 
188
+ # 输入: pdf_input, size_dropdown, conversion_mode
189
+ # 输出: image_output, download_output
190
  convert_button.click(
191
+ fn=pdf_to_jpg_or_zip,
192
+ inputs=[pdf_input, size_dropdown, conversion_mode],
193
  outputs=[image_output, download_output]
194
  )
195