Spaces:

shriarul5273
/

PDF_Utils

Running

File size: 9,644 Bytes

import gradio as gr
from PyPDF2 import PdfReader, PdfWriter, PageObject
from PIL import Image
import tempfile
import os
import atexit
import zipfile
from pdf2docx import Converter

def merge_pdfs(pdf_files, order, start_on_odd=False):
    pdf_writer = PdfWriter()
    
    # Sort the PDF files based on the specified order, skipping files marked with '0'
    sorted_pdfs = [pdf_files[i-1] for i in order if i != 0]
    
    # Define default page size (A4)
    default_width = 595.276  # 8.27 inches
    default_height = 841.890  # 11.69 inches
    
    # Read and add each PDF file to the writer in the specified order
    for i, pdf in enumerate(sorted_pdfs):
        pdf_reader = PdfReader(pdf.name)
        
        # If start_on_odd is True and it's not the first PDF and the current total page count is odd, add a blank page
        if start_on_odd and i > 0 and len(pdf_writer.pages) % 2 != 0:
            blank_page = PageObject.create_blank_page(width=default_width, height=default_height)
            pdf_writer.add_page(blank_page)
        
        for page in pdf_reader.pages:
            pdf_writer.add_page(page)
    
    # Create a named temporary file for the merged PDF
    temp_file_path = os.path.join(tempfile.gettempdir(), "combine.pdf")
    with open(temp_file_path, 'wb') as temp_file:
        pdf_writer.write(temp_file)
    
    return temp_file_path

def pdf_to_images(pdf_file, image_format="JPEG"):
    # Convert PDF to images using PIL
    from pdf2image import convert_from_bytes
    with open(pdf_file.name, "rb") as f:
        pdf_bytes = f.read()
    images = convert_from_bytes(pdf_bytes, fmt=image_format)

    temp_dir = tempfile.mkdtemp()
    image_paths = []

    for i, image in enumerate(images):
        ext = "jpg" if image_format == "JPEG" else "png"
        image_path = os.path.join(temp_dir, f"page_{i + 1}.{ext}")
        image.save(image_path, image_format)
        image_paths.append(image_path)

    return image_paths

def images_to_pdf(image_files):
    # Convert images to a single PDF
    temp_file_path = os.path.join(tempfile.gettempdir(), "images_to_pdf.pdf")
    image_list = [Image.open(image.name).convert("RGB") for image in image_files]
    image_list[0].save(temp_file_path, save_all=True, append_images=image_list[1:])
    return temp_file_path

def images_to_zip(image_paths):
    # Create a zip file containing all images
    zip_file_path = os.path.join(tempfile.gettempdir(), "images.zip")
    with zipfile.ZipFile(zip_file_path, 'w') as zipf:
        for image_path in image_paths:
            zipf.write(image_path, os.path.basename(image_path))
    return zip_file_path

def pdf_to_docx(pdf_file):
    # Convert PDF to DOCX
    temp_file_path = os.path.join(tempfile.gettempdir(), "converted.docx")
    converter = Converter(pdf_file.name)
    converter.convert(temp_file_path)
    converter.close()
    return temp_file_path

# Create Gradio interface
with gr.Blocks(theme="gstaff/xkcd") as demo:
    gr.Markdown("# PDF Merger and Converter")
    with gr.Tabs():
        with gr.TabItem("PDF Merger"):
            pdf_input = gr.File(label="Upload PDF Files to Merge", file_types=[".pdf"], file_count="multiple")
            order_input = gr.Textbox(label="Enter the order of PDFs as comma-separated numbers, skip the number if you want to skip the file", placeholder="1,2,3,... or 3,1,2")
            
            with gr.Row():
                merge_button = gr.Button("Merge PDFs (Normal)")
                merge_odd_button = gr.Button("Merge PDFs (Each PDF starts on odd page)")
            
            merged_result = gr.File(label="Download Merged PDF")

            def merge_and_preview(pdf_files, order, start_on_odd=False):
                n = len(pdf_files)
                
                if not order:
                    # Default to natural order if order is empty
                    order = list(range(1, n + 1))
                else:
                    try:
                        # Convert the input string to a list of integers
                        order = [int(x.strip()) for x in order.split(',')]
                    except ValueError:
                        return gr.Error("Invalid order format. Ensure it is comma-separated numbers.")
                
                # Ensure the order does not reference non-existing files
                if any(i < 0 or i > n for i in order):
                    return gr.Error(f"Order values must be between 0 and {n} (0 means to skip the file).")
                
                # Merge PDFs with the specified start_on_odd option
                merged_pdf_path = merge_pdfs(pdf_files, order, start_on_odd)
                return merged_pdf_path

            merge_button.click(
                lambda *args: merge_and_preview(*args, False),
                inputs=[pdf_input, order_input],
                outputs=[merged_result]
            )
            
            merge_odd_button.click(
                lambda *args: merge_and_preview(*args, True),
                inputs=[pdf_input, order_input],
                outputs=[merged_result]
            )

        with gr.TabItem("PDF to Image Converter"):
            single_pdf_input = gr.File(label="Upload PDF File to Convert", file_types=[".pdf"], file_count="single")
            image_format_option = gr.Radio(label="Select Image Format", choices=["JPEG", "PNG"], value="JPEG")
            image_output = gr.Gallery(label="Converted Images", show_label=True)
            download_zip_button = gr.Button("Download All Images as ZIP")
            zip_result = gr.File(label="Download ZIP")

            def convert_pdf_to_images_with_format(pdf_file, image_format):
                return pdf_to_images(pdf_file, image_format)

            def download_images_as_zip_with_format(pdf_file, image_format):
                image_paths = pdf_to_images(pdf_file, image_format)
                return images_to_zip(image_paths)

            single_pdf_input.change(
                convert_pdf_to_images_with_format,
                inputs=[single_pdf_input, image_format_option],
                outputs=[image_output]
            )

            download_zip_button.click(
                download_images_as_zip_with_format,
                inputs=[single_pdf_input, image_format_option],
                outputs=[zip_result]
            )

        with gr.TabItem("Image to PDF Converter"):
            image_input = gr.File(label="Upload Images to Convert to PDF", file_types=[".jpg", ".png"], file_count="multiple")
            order_option = gr.Radio(label="Select Order Type", choices=["Ordered", "Reverse", "Custom"], value="Ordered")
            custom_order_input = gr.Textbox(label="Enter custom order (comma-separated indices)", visible=False)
            image_gallery = gr.Gallery(label="Images Preview (Arrange Order)", show_label=True)
            pdf_result = gr.File(label="Download PDF")

            def update_custom_order_visibility(order_type):
                return gr.update(visible=(order_type == "Custom"))

            def sort_images(order_type, custom_order, images):
                if order_type == "Reverse":
                    return images[::-1]
                elif order_type == "Custom":
                    try:
                        indices = [int(i.strip()) - 1 for i in custom_order.split(',')]
                        return [images[i] for i in indices]
                    except (ValueError, IndexError):
                        return gr.Error("Invalid custom order. Ensure all indices are valid and within range.")
                return images

            order_option.change(
                update_custom_order_visibility,
                inputs=[order_option],
                outputs=[custom_order_input]
            )

            gr.Button("Preview Sorted Images").click(
                lambda order_type, custom_order, images: sort_images(order_type, custom_order, images),
                inputs=[order_option, custom_order_input, image_input],
                outputs=[image_gallery]
            )

            gr.Button("Generate PDF").click(
                lambda order_type, custom_order, images: images_to_pdf(sort_images(order_type, custom_order, images)),
                inputs=[order_option, custom_order_input, image_input],
                outputs=[pdf_result]
            )

        with gr.TabItem("PDF to DOCX Converter"):
            gr.Markdown("Some PDF files may not be converted properly due to the complexity of the PDF file")
            pdf_to_docx_input = gr.File(label="Upload PDF File to Convert to DOCX", file_types=[".pdf"], file_count="single")
            convert_button = gr.Button("Convert to DOCX")
            docx_result = gr.File(label="Download DOCX")

            def convert_pdf_to_docx_with_button(pdf_file):
                return pdf_to_docx(pdf_file)

            convert_button.click(
                convert_pdf_to_docx_with_button,
                inputs=[pdf_to_docx_input],
                outputs=[docx_result]
            )

# Launch the Gradio app
demo.launch()

# Clean up temporary files
def cleanup_temp_files():
    temp_dir = tempfile.gettempdir()
    for filename in os.listdir(temp_dir):
        if filename.endswith('.pdf') or filename.endswith('.jpg') or filename.endswith('.png') or filename.endswith('.docx'):
            os.remove(os.path.join(temp_dir, filename))

atexit.register(cleanup_temp_files)