shriarul5273 commited on
Commit
cea9063
Β·
verified Β·
1 Parent(s): 0afe241

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +21 -0
  2. README.md +13 -13
  3. app.py +193 -168
  4. requirements.txt +3 -3
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ubuntu:22.04
2
+
3
+ RUN apt-get update && apt-get install -y python3 python3-pip && apt-get clean
4
+
5
+ RUN apt install poppler-utils -y
6
+
7
+ # Install the necessary packages
8
+ WORKDIR /app
9
+
10
+ COPY . /app
11
+
12
+ RUN python3 -m pip install --upgrade pip
13
+
14
+ EXPOSE 7860
15
+
16
+ RUN pip install -r requirements.txt
17
+
18
+ ENV GRADIO_SERVER_NAME="0.0.0.0"
19
+
20
+
21
+ CMD ["python3", "app.py"]
README.md CHANGED
@@ -1,13 +1,13 @@
1
- ---
2
- title: PDF Merger
3
- emoji: 🌍
4
- colorFrom: green
5
- colorTo: red
6
- sdk: docker
7
- sdk_version: 5.11.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: PDF Merger
3
+ emoji: 🌍
4
+ colorFrom: green
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 5.1.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,168 +1,193 @@
1
- import gradio as gr
2
- from PyPDF2 import PdfReader, PdfWriter, PageObject
3
- from pdf2image import convert_from_path
4
- import img2pdf
5
- import tempfile
6
- import os
7
- import atexit
8
-
9
- def merge_pdfs(pdf_files, order, start_on_odd=False):
10
- pdf_writer = PdfWriter()
11
-
12
- # Sort the PDF files based on the specified order, skipping files marked with '0'
13
- sorted_pdfs = [pdf_files[i-1] for i in order if i != 0]
14
-
15
- # Define default page size (A4)
16
- default_width = 595.276 # 8.27 inches
17
- default_height = 841.890 # 11.69 inches
18
-
19
- # Read and add each PDF file to the writer in the specified order
20
- for i, pdf in enumerate(sorted_pdfs):
21
- pdf_reader = PdfReader(pdf.name)
22
-
23
- # If start_on_odd is True and it's not the first PDF and the current total page count is odd, add a blank page
24
- if start_on_odd and i > 0 and len(pdf_writer.pages) % 2 != 0:
25
- blank_page = PageObject.create_blank_page(width=default_width, height=default_height)
26
- pdf_writer.add_page(blank_page)
27
-
28
- for page in pdf_reader.pages:
29
- pdf_writer.add_page(page)
30
-
31
- # Create a named temporary file for the merged PDF
32
- temp_file_path = os.path.join(tempfile.gettempdir(), "combine.pdf")
33
- with open(temp_file_path, 'wb') as temp_file:
34
- pdf_writer.write(temp_file)
35
-
36
- return temp_file_path
37
-
38
- def pdf_to_images(pdf_file):
39
- # Convert PDF to images
40
- temp_dir = tempfile.mkdtemp()
41
- images = convert_from_path(pdf_file.name, output_folder=temp_dir, fmt='jpg')
42
- image_paths = []
43
-
44
- for i, image in enumerate(images):
45
- image_path = os.path.join(temp_dir, f"page_{i + 1}.jpg")
46
- image.save(image_path, "JPEG")
47
- image_paths.append(image_path)
48
-
49
- return image_paths
50
-
51
- def images_to_pdf(image_files):
52
- # Convert images to a single PDF
53
- temp_file_path = os.path.join(tempfile.gettempdir(), "images_to_pdf.pdf")
54
- with open(temp_file_path, "wb") as pdf_file:
55
- pdf_file.write(img2pdf.convert([image.name for image in image_files]))
56
- return temp_file_path
57
-
58
- # Create Gradio interface
59
- with gr.Blocks() as demo:
60
- gr.Markdown("# PDF Merger and Converter")
61
-
62
- with gr.Tabs():
63
- with gr.TabItem("PDF Merger"):
64
- pdf_input = gr.File(label="Upload PDF Files to Merge", file_types=[".pdf"], file_count="multiple")
65
- order_input = gr.Textbox(label="Enter the order of PDFs as comma-separated numbers, skip the number if you want to skip the file", placeholder="1,2,3,... or 3,1,2")
66
-
67
- with gr.Row():
68
- merge_button = gr.Button("Merge PDFs (Normal)")
69
- merge_odd_button = gr.Button("Merge PDFs (Each PDF starts on odd page)")
70
-
71
- merged_result = gr.File(label="Download Merged PDF")
72
-
73
- def merge_and_preview(pdf_files, order, start_on_odd=False):
74
- n = len(pdf_files)
75
-
76
- if not order:
77
- # Default to natural order if order is empty
78
- order = list(range(1, n + 1))
79
- else:
80
- try:
81
- # Convert the input string to a list of integers
82
- order = [int(x.strip()) for x in order.split(',')]
83
- except ValueError:
84
- return gr.Error("Invalid order format. Ensure it is comma-separated numbers.")
85
-
86
- # Ensure the order does not reference non-existing files
87
- if any(i < 0 or i > n for i in order):
88
- return gr.Error(f"Order values must be between 0 and {n} (0 means to skip the file).")
89
-
90
- # Merge PDFs with the specified start_on_odd option
91
- merged_pdf_path = merge_pdfs(pdf_files, order, start_on_odd)
92
- return merged_pdf_path
93
-
94
- merge_button.click(
95
- lambda *args: merge_and_preview(*args, False),
96
- inputs=[pdf_input, order_input],
97
- outputs=[merged_result]
98
- )
99
-
100
- merge_odd_button.click(
101
- lambda *args: merge_and_preview(*args, True),
102
- inputs=[pdf_input, order_input],
103
- outputs=[merged_result]
104
- )
105
-
106
- with gr.TabItem("PDF to Image Converter"):
107
- single_pdf_input = gr.File(label="Upload PDF File to Convert", file_types=[".pdf"], file_count="single")
108
- image_output = gr.Gallery(label="Converted Images (JPG)", show_label=True)
109
-
110
- def convert_pdf_to_images(pdf_file):
111
- return pdf_to_images(pdf_file)
112
-
113
- single_pdf_input.change(
114
- convert_pdf_to_images,
115
- inputs=[single_pdf_input],
116
- outputs=[image_output]
117
- )
118
-
119
- with gr.TabItem("Image to PDF Converter"):
120
- image_input = gr.File(label="Upload Images to Convert to PDF", file_types=[".jpg", ".png"], file_count="multiple")
121
- order_option = gr.Radio(label="Select Order Type", choices=["Ordered", "Reverse", "Custom"], value="Ordered")
122
- custom_order_input = gr.Textbox(label="Enter custom order (comma-separated indices)", visible=False)
123
- image_gallery = gr.Gallery(label="Images Preview (Arrange Order)", show_label=True)
124
- pdf_result = gr.File(label="Download PDF")
125
-
126
- def update_custom_order_visibility(order_type):
127
- return gr.update(visible=(order_type == "Custom"))
128
-
129
- def sort_images(order_type, custom_order, images):
130
- if order_type == "Reverse":
131
- return images[::-1]
132
- elif order_type == "Custom":
133
- try:
134
- indices = [int(i.strip()) - 1 for i in custom_order.split(',')]
135
- return [images[i] for i in indices]
136
- except (ValueError, IndexError):
137
- return gr.Error("Invalid custom order. Ensure all indices are valid and within range.")
138
- return images
139
-
140
- order_option.change(
141
- update_custom_order_visibility,
142
- inputs=[order_option],
143
- outputs=[custom_order_input]
144
- )
145
-
146
- gr.Button("Preview Sorted Images").click(
147
- lambda order_type, custom_order, images: sort_images(order_type, custom_order, images),
148
- inputs=[order_option, custom_order_input, image_input],
149
- outputs=[image_gallery]
150
- )
151
-
152
- gr.Button("Generate PDF").click(
153
- lambda order_type, custom_order, images: images_to_pdf(sort_images(order_type, custom_order, images)),
154
- inputs=[order_option, custom_order_input, image_input],
155
- outputs=[pdf_result]
156
- )
157
-
158
- # Launch the Gradio app
159
- demo.launch()
160
-
161
- # Clean up temporary files
162
- def cleanup_temp_files():
163
- temp_dir = tempfile.gettempdir()
164
- for filename in os.listdir(temp_dir):
165
- if filename.endswith('.pdf') or filename.endswith('.jpg'):
166
- os.remove(os.path.join(temp_dir, filename))
167
-
168
- atexit.register(cleanup_temp_files)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PyPDF2 import PdfReader, PdfWriter, PageObject
3
+ from PIL import Image
4
+ import tempfile
5
+ import os
6
+ import atexit
7
+ import zipfile
8
+
9
+ def merge_pdfs(pdf_files, order, start_on_odd=False):
10
+ pdf_writer = PdfWriter()
11
+
12
+ # Sort the PDF files based on the specified order, skipping files marked with '0'
13
+ sorted_pdfs = [pdf_files[i-1] for i in order if i != 0]
14
+
15
+ # Define default page size (A4)
16
+ default_width = 595.276 # 8.27 inches
17
+ default_height = 841.890 # 11.69 inches
18
+
19
+ # Read and add each PDF file to the writer in the specified order
20
+ for i, pdf in enumerate(sorted_pdfs):
21
+ pdf_reader = PdfReader(pdf.name)
22
+
23
+ # If start_on_odd is True and it's not the first PDF and the current total page count is odd, add a blank page
24
+ if start_on_odd and i > 0 and len(pdf_writer.pages) % 2 != 0:
25
+ blank_page = PageObject.create_blank_page(width=default_width, height=default_height)
26
+ pdf_writer.add_page(blank_page)
27
+
28
+ for page in pdf_reader.pages:
29
+ pdf_writer.add_page(page)
30
+
31
+ # Create a named temporary file for the merged PDF
32
+ temp_file_path = os.path.join(tempfile.gettempdir(), "combine.pdf")
33
+ with open(temp_file_path, 'wb') as temp_file:
34
+ pdf_writer.write(temp_file)
35
+
36
+ return temp_file_path
37
+
38
+ def pdf_to_images(pdf_file, image_format="JPEG"):
39
+ # Convert PDF to images using PIL
40
+ from pdf2image import convert_from_bytes
41
+ with open(pdf_file.name, "rb") as f:
42
+ pdf_bytes = f.read()
43
+ images = convert_from_bytes(pdf_bytes, fmt=image_format)
44
+
45
+ temp_dir = tempfile.mkdtemp()
46
+ image_paths = []
47
+
48
+ for i, image in enumerate(images):
49
+ ext = "jpg" if image_format == "JPEG" else "png"
50
+ image_path = os.path.join(temp_dir, f"page_{i + 1}.{ext}")
51
+ image.save(image_path, image_format)
52
+ image_paths.append(image_path)
53
+
54
+ return image_paths
55
+
56
+ def images_to_pdf(image_files):
57
+ # Convert images to a single PDF
58
+ temp_file_path = os.path.join(tempfile.gettempdir(), "images_to_pdf.pdf")
59
+ image_list = [Image.open(image.name).convert("RGB") for image in image_files]
60
+ image_list[0].save(temp_file_path, save_all=True, append_images=image_list[1:])
61
+ return temp_file_path
62
+
63
+ def images_to_zip(image_paths):
64
+ # Create a zip file containing all images
65
+ zip_file_path = os.path.join(tempfile.gettempdir(), "images.zip")
66
+ with zipfile.ZipFile(zip_file_path, 'w') as zipf:
67
+ for image_path in image_paths:
68
+ zipf.write(image_path, os.path.basename(image_path))
69
+ return zip_file_path
70
+
71
+ # Create Gradio interface
72
+ with gr.Blocks(theme="gstaff/xkcd") as demo:
73
+ gr.Markdown("# PDF Merger and Converter")
74
+ with gr.Tabs():
75
+ with gr.TabItem("PDF Merger"):
76
+ pdf_input = gr.File(label="Upload PDF Files to Merge", file_types=[".pdf"], file_count="multiple")
77
+ order_input = gr.Textbox(label="Enter the order of PDFs as comma-separated numbers, skip the number if you want to skip the file", placeholder="1,2,3,... or 3,1,2")
78
+
79
+ with gr.Row():
80
+ merge_button = gr.Button("Merge PDFs (Normal)")
81
+ merge_odd_button = gr.Button("Merge PDFs (Each PDF starts on odd page)")
82
+
83
+ merged_result = gr.File(label="Download Merged PDF")
84
+
85
+ def merge_and_preview(pdf_files, order, start_on_odd=False):
86
+ n = len(pdf_files)
87
+
88
+ if not order:
89
+ # Default to natural order if order is empty
90
+ order = list(range(1, n + 1))
91
+ else:
92
+ try:
93
+ # Convert the input string to a list of integers
94
+ order = [int(x.strip()) for x in order.split(',')]
95
+ except ValueError:
96
+ return gr.Error("Invalid order format. Ensure it is comma-separated numbers.")
97
+
98
+ # Ensure the order does not reference non-existing files
99
+ if any(i < 0 or i > n for i in order):
100
+ return gr.Error(f"Order values must be between 0 and {n} (0 means to skip the file).")
101
+
102
+ # Merge PDFs with the specified start_on_odd option
103
+ merged_pdf_path = merge_pdfs(pdf_files, order, start_on_odd)
104
+ return merged_pdf_path
105
+
106
+ merge_button.click(
107
+ lambda *args: merge_and_preview(*args, False),
108
+ inputs=[pdf_input, order_input],
109
+ outputs=[merged_result]
110
+ )
111
+
112
+ merge_odd_button.click(
113
+ lambda *args: merge_and_preview(*args, True),
114
+ inputs=[pdf_input, order_input],
115
+ outputs=[merged_result]
116
+ )
117
+
118
+ with gr.TabItem("PDF to Image Converter"):
119
+ single_pdf_input = gr.File(label="Upload PDF File to Convert", file_types=[".pdf"], file_count="single")
120
+ image_format_option = gr.Radio(label="Select Image Format", choices=["JPEG", "PNG"], value="JPEG")
121
+ image_output = gr.Gallery(label="Converted Images", show_label=True)
122
+ download_zip_button = gr.Button("Download All Images as ZIP")
123
+ zip_result = gr.File(label="Download ZIP")
124
+
125
+ def convert_pdf_to_images_with_format(pdf_file, image_format):
126
+ return pdf_to_images(pdf_file, image_format)
127
+
128
+ def download_images_as_zip_with_format(pdf_file, image_format):
129
+ image_paths = pdf_to_images(pdf_file, image_format)
130
+ return images_to_zip(image_paths)
131
+
132
+ single_pdf_input.change(
133
+ convert_pdf_to_images_with_format,
134
+ inputs=[single_pdf_input, image_format_option],
135
+ outputs=[image_output]
136
+ )
137
+
138
+ download_zip_button.click(
139
+ download_images_as_zip_with_format,
140
+ inputs=[single_pdf_input, image_format_option],
141
+ outputs=[zip_result]
142
+ )
143
+
144
+ with gr.TabItem("Image to PDF Converter"):
145
+ image_input = gr.File(label="Upload Images to Convert to PDF", file_types=[".jpg", ".png"], file_count="multiple")
146
+ order_option = gr.Radio(label="Select Order Type", choices=["Ordered", "Reverse", "Custom"], value="Ordered")
147
+ custom_order_input = gr.Textbox(label="Enter custom order (comma-separated indices)", visible=False)
148
+ image_gallery = gr.Gallery(label="Images Preview (Arrange Order)", show_label=True)
149
+ pdf_result = gr.File(label="Download PDF")
150
+
151
+ def update_custom_order_visibility(order_type):
152
+ return gr.update(visible=(order_type == "Custom"))
153
+
154
+ def sort_images(order_type, custom_order, images):
155
+ if order_type == "Reverse":
156
+ return images[::-1]
157
+ elif order_type == "Custom":
158
+ try:
159
+ indices = [int(i.strip()) - 1 for i in custom_order.split(',')]
160
+ return [images[i] for i in indices]
161
+ except (ValueError, IndexError):
162
+ return gr.Error("Invalid custom order. Ensure all indices are valid and within range.")
163
+ return images
164
+
165
+ order_option.change(
166
+ update_custom_order_visibility,
167
+ inputs=[order_option],
168
+ outputs=[custom_order_input]
169
+ )
170
+
171
+ gr.Button("Preview Sorted Images").click(
172
+ lambda order_type, custom_order, images: sort_images(order_type, custom_order, images),
173
+ inputs=[order_option, custom_order_input, image_input],
174
+ outputs=[image_gallery]
175
+ )
176
+
177
+ gr.Button("Generate PDF").click(
178
+ lambda order_type, custom_order, images: images_to_pdf(sort_images(order_type, custom_order, images)),
179
+ inputs=[order_option, custom_order_input, image_input],
180
+ outputs=[pdf_result]
181
+ )
182
+
183
+ # Launch the Gradio app
184
+ demo.launch()
185
+
186
+ # Clean up temporary files
187
+ def cleanup_temp_files():
188
+ temp_dir = tempfile.gettempdir()
189
+ for filename in os.listdir(temp_dir):
190
+ if filename.endswith('.pdf') or filename.endswith('.jpg') or filename.endswith('.png'):
191
+ os.remove(os.path.join(temp_dir, filename))
192
+
193
+ atexit.register(cleanup_temp_files)
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- gradio
2
- PyPDF2
3
- img2pdf
4
  pdf2image
 
1
+ gradio
2
+ PyPDF2
3
+ img2pdf
4
  pdf2image