Spaces:
Running
Running
import gradio as gr | |
from PyPDF2 import PdfReader, PdfWriter, PageObject | |
from PIL import Image | |
import tempfile | |
import os | |
import atexit | |
import zipfile | |
from pdf2docx import Converter | |
def merge_pdfs(pdf_files, order, start_on_odd=False): | |
pdf_writer = PdfWriter() | |
# Sort the PDF files based on the specified order, skipping files marked with '0' | |
sorted_pdfs = [pdf_files[i-1] for i in order if i != 0] | |
# Define default page size (A4) | |
default_width = 595.276 # 8.27 inches | |
default_height = 841.890 # 11.69 inches | |
# Read and add each PDF file to the writer in the specified order | |
for i, pdf in enumerate(sorted_pdfs): | |
pdf_reader = PdfReader(pdf.name) | |
# If start_on_odd is True and it's not the first PDF and the current total page count is odd, add a blank page | |
if start_on_odd and i > 0 and len(pdf_writer.pages) % 2 != 0: | |
blank_page = PageObject.create_blank_page(width=default_width, height=default_height) | |
pdf_writer.add_page(blank_page) | |
for page in pdf_reader.pages: | |
pdf_writer.add_page(page) | |
# Create a named temporary file for the merged PDF | |
temp_file_path = os.path.join(tempfile.gettempdir(), "combine.pdf") | |
with open(temp_file_path, 'wb') as temp_file: | |
pdf_writer.write(temp_file) | |
return temp_file_path | |
def pdf_to_images(pdf_file, image_format="JPEG"): | |
# Convert PDF to images using PIL | |
from pdf2image import convert_from_bytes | |
with open(pdf_file.name, "rb") as f: | |
pdf_bytes = f.read() | |
images = convert_from_bytes(pdf_bytes, fmt=image_format) | |
temp_dir = tempfile.mkdtemp() | |
image_paths = [] | |
for i, image in enumerate(images): | |
ext = "jpg" if image_format == "JPEG" else "png" | |
image_path = os.path.join(temp_dir, f"page_{i + 1}.{ext}") | |
image.save(image_path, image_format) | |
image_paths.append(image_path) | |
return image_paths | |
def images_to_pdf(image_files): | |
# Convert images to a single PDF | |
temp_file_path = os.path.join(tempfile.gettempdir(), "images_to_pdf.pdf") | |
image_list = [Image.open(image.name).convert("RGB") for image in image_files] | |
image_list[0].save(temp_file_path, save_all=True, append_images=image_list[1:]) | |
return temp_file_path | |
def images_to_zip(image_paths): | |
# Create a zip file containing all images | |
zip_file_path = os.path.join(tempfile.gettempdir(), "images.zip") | |
with zipfile.ZipFile(zip_file_path, 'w') as zipf: | |
for image_path in image_paths: | |
zipf.write(image_path, os.path.basename(image_path)) | |
return zip_file_path | |
def pdf_to_docx(pdf_file): | |
# Convert PDF to DOCX | |
temp_file_path = os.path.join(tempfile.gettempdir(), "converted.docx") | |
converter = Converter(pdf_file.name) | |
converter.convert(temp_file_path) | |
converter.close() | |
return temp_file_path | |
# Create Gradio interface | |
with gr.Blocks(theme="gstaff/xkcd") as demo: | |
gr.Markdown("# PDF Merger and Converter") | |
with gr.Tabs(): | |
with gr.TabItem("PDF Merger"): | |
pdf_input = gr.File(label="Upload PDF Files to Merge", file_types=[".pdf"], file_count="multiple") | |
order_input = gr.Textbox(label="Enter the order of PDFs as comma-separated numbers, skip the number if you want to skip the file", placeholder="1,2,3,... or 3,1,2") | |
with gr.Row(): | |
merge_button = gr.Button("Merge PDFs (Normal)") | |
merge_odd_button = gr.Button("Merge PDFs (Each PDF starts on odd page)") | |
merged_result = gr.File(label="Download Merged PDF") | |
def merge_and_preview(pdf_files, order, start_on_odd=False): | |
n = len(pdf_files) | |
if not order: | |
# Default to natural order if order is empty | |
order = list(range(1, n + 1)) | |
else: | |
try: | |
# Convert the input string to a list of integers | |
order = [int(x.strip()) for x in order.split(',')] | |
except ValueError: | |
return gr.Error("Invalid order format. Ensure it is comma-separated numbers.") | |
# Ensure the order does not reference non-existing files | |
if any(i < 0 or i > n for i in order): | |
return gr.Error(f"Order values must be between 0 and {n} (0 means to skip the file).") | |
# Merge PDFs with the specified start_on_odd option | |
merged_pdf_path = merge_pdfs(pdf_files, order, start_on_odd) | |
return merged_pdf_path | |
merge_button.click( | |
lambda *args: merge_and_preview(*args, False), | |
inputs=[pdf_input, order_input], | |
outputs=[merged_result] | |
) | |
merge_odd_button.click( | |
lambda *args: merge_and_preview(*args, True), | |
inputs=[pdf_input, order_input], | |
outputs=[merged_result] | |
) | |
with gr.TabItem("PDF to Image Converter"): | |
single_pdf_input = gr.File(label="Upload PDF File to Convert", file_types=[".pdf"], file_count="single") | |
image_format_option = gr.Radio(label="Select Image Format", choices=["JPEG", "PNG"], value="JPEG") | |
image_output = gr.Gallery(label="Converted Images", show_label=True) | |
download_zip_button = gr.Button("Download All Images as ZIP") | |
zip_result = gr.File(label="Download ZIP") | |
def convert_pdf_to_images_with_format(pdf_file, image_format): | |
return pdf_to_images(pdf_file, image_format) | |
def download_images_as_zip_with_format(pdf_file, image_format): | |
image_paths = pdf_to_images(pdf_file, image_format) | |
return images_to_zip(image_paths) | |
single_pdf_input.change( | |
convert_pdf_to_images_with_format, | |
inputs=[single_pdf_input, image_format_option], | |
outputs=[image_output] | |
) | |
download_zip_button.click( | |
download_images_as_zip_with_format, | |
inputs=[single_pdf_input, image_format_option], | |
outputs=[zip_result] | |
) | |
with gr.TabItem("Image to PDF Converter"): | |
image_input = gr.File(label="Upload Images to Convert to PDF", file_types=[".jpg", ".png"], file_count="multiple") | |
order_option = gr.Radio(label="Select Order Type", choices=["Ordered", "Reverse", "Custom"], value="Ordered") | |
custom_order_input = gr.Textbox(label="Enter custom order (comma-separated indices)", visible=False) | |
image_gallery = gr.Gallery(label="Images Preview (Arrange Order)", show_label=True) | |
pdf_result = gr.File(label="Download PDF") | |
def update_custom_order_visibility(order_type): | |
return gr.update(visible=(order_type == "Custom")) | |
def sort_images(order_type, custom_order, images): | |
if order_type == "Reverse": | |
return images[::-1] | |
elif order_type == "Custom": | |
try: | |
indices = [int(i.strip()) - 1 for i in custom_order.split(',')] | |
return [images[i] for i in indices] | |
except (ValueError, IndexError): | |
return gr.Error("Invalid custom order. Ensure all indices are valid and within range.") | |
return images | |
order_option.change( | |
update_custom_order_visibility, | |
inputs=[order_option], | |
outputs=[custom_order_input] | |
) | |
gr.Button("Preview Sorted Images").click( | |
lambda order_type, custom_order, images: sort_images(order_type, custom_order, images), | |
inputs=[order_option, custom_order_input, image_input], | |
outputs=[image_gallery] | |
) | |
gr.Button("Generate PDF").click( | |
lambda order_type, custom_order, images: images_to_pdf(sort_images(order_type, custom_order, images)), | |
inputs=[order_option, custom_order_input, image_input], | |
outputs=[pdf_result] | |
) | |
with gr.TabItem("PDF to DOCX Converter"): | |
gr.Markdown("Some PDF files may not be converted properly due to the complexity of the PDF file") | |
pdf_to_docx_input = gr.File(label="Upload PDF File to Convert to DOCX", file_types=[".pdf"], file_count="single") | |
convert_button = gr.Button("Convert to DOCX") | |
docx_result = gr.File(label="Download DOCX") | |
def convert_pdf_to_docx_with_button(pdf_file): | |
return pdf_to_docx(pdf_file) | |
convert_button.click( | |
convert_pdf_to_docx_with_button, | |
inputs=[pdf_to_docx_input], | |
outputs=[docx_result] | |
) | |
# Launch the Gradio app | |
demo.launch() | |
# Clean up temporary files | |
def cleanup_temp_files(): | |
temp_dir = tempfile.gettempdir() | |
for filename in os.listdir(temp_dir): | |
if filename.endswith('.pdf') or filename.endswith('.jpg') or filename.endswith('.png') or filename.endswith('.docx'): | |
os.remove(os.path.join(temp_dir, filename)) | |
atexit.register(cleanup_temp_files) | |