import gradio as gr from PyPDF2 import PdfReader, PdfWriter, PageObject from PIL import Image import tempfile import os import atexit import zipfile from pdf2docx import Converter def merge_pdfs(pdf_files, order, start_on_odd=False): pdf_writer = PdfWriter() # Sort the PDF files based on the specified order, skipping files marked with '0' sorted_pdfs = [pdf_files[i-1] for i in order if i != 0] # Define default page size (A4) default_width = 595.276 # 8.27 inches default_height = 841.890 # 11.69 inches # Read and add each PDF file to the writer in the specified order for i, pdf in enumerate(sorted_pdfs): pdf_reader = PdfReader(pdf.name) # If start_on_odd is True and it's not the first PDF and the current total page count is odd, add a blank page if start_on_odd and i > 0 and len(pdf_writer.pages) % 2 != 0: blank_page = PageObject.create_blank_page(width=default_width, height=default_height) pdf_writer.add_page(blank_page) for page in pdf_reader.pages: pdf_writer.add_page(page) # Create a named temporary file for the merged PDF temp_file_path = os.path.join(tempfile.gettempdir(), "combine.pdf") with open(temp_file_path, 'wb') as temp_file: pdf_writer.write(temp_file) return temp_file_path def pdf_to_images(pdf_file, image_format="JPEG"): # Convert PDF to images using PIL from pdf2image import convert_from_bytes with open(pdf_file.name, "rb") as f: pdf_bytes = f.read() images = convert_from_bytes(pdf_bytes, fmt=image_format) temp_dir = tempfile.mkdtemp() image_paths = [] for i, image in enumerate(images): ext = "jpg" if image_format == "JPEG" else "png" image_path = os.path.join(temp_dir, f"page_{i + 1}.{ext}") image.save(image_path, image_format) image_paths.append(image_path) return image_paths def images_to_pdf(image_files): # Convert images to a single PDF temp_file_path = os.path.join(tempfile.gettempdir(), "images_to_pdf.pdf") image_list = [Image.open(image.name).convert("RGB") for image in image_files] image_list[0].save(temp_file_path, save_all=True, append_images=image_list[1:]) return temp_file_path def images_to_zip(image_paths): # Create a zip file containing all images zip_file_path = os.path.join(tempfile.gettempdir(), "images.zip") with zipfile.ZipFile(zip_file_path, 'w') as zipf: for image_path in image_paths: zipf.write(image_path, os.path.basename(image_path)) return zip_file_path def pdf_to_docx(pdf_file): # Convert PDF to DOCX temp_file_path = os.path.join(tempfile.gettempdir(), "converted.docx") converter = Converter(pdf_file.name) converter.convert(temp_file_path) converter.close() return temp_file_path # Create Gradio interface with gr.Blocks(theme="gstaff/xkcd") as demo: gr.Markdown("# PDF Merger and Converter") with gr.Tabs(): with gr.TabItem("PDF Merger"): pdf_input = gr.File(label="Upload PDF Files to Merge", file_types=[".pdf"], file_count="multiple") order_input = gr.Textbox(label="Enter the order of PDFs as comma-separated numbers, skip the number if you want to skip the file", placeholder="1,2,3,... or 3,1,2") with gr.Row(): merge_button = gr.Button("Merge PDFs (Normal)") merge_odd_button = gr.Button("Merge PDFs (Each PDF starts on odd page)") merged_result = gr.File(label="Download Merged PDF") def merge_and_preview(pdf_files, order, start_on_odd=False): n = len(pdf_files) if not order: # Default to natural order if order is empty order = list(range(1, n + 1)) else: try: # Convert the input string to a list of integers order = [int(x.strip()) for x in order.split(',')] except ValueError: return gr.Error("Invalid order format. Ensure it is comma-separated numbers.") # Ensure the order does not reference non-existing files if any(i < 0 or i > n for i in order): return gr.Error(f"Order values must be between 0 and {n} (0 means to skip the file).") # Merge PDFs with the specified start_on_odd option merged_pdf_path = merge_pdfs(pdf_files, order, start_on_odd) return merged_pdf_path merge_button.click( lambda *args: merge_and_preview(*args, False), inputs=[pdf_input, order_input], outputs=[merged_result] ) merge_odd_button.click( lambda *args: merge_and_preview(*args, True), inputs=[pdf_input, order_input], outputs=[merged_result] ) with gr.TabItem("PDF to Image Converter"): single_pdf_input = gr.File(label="Upload PDF File to Convert", file_types=[".pdf"], file_count="single") image_format_option = gr.Radio(label="Select Image Format", choices=["JPEG", "PNG"], value="JPEG") image_output = gr.Gallery(label="Converted Images", show_label=True) download_zip_button = gr.Button("Download All Images as ZIP") zip_result = gr.File(label="Download ZIP") def convert_pdf_to_images_with_format(pdf_file, image_format): return pdf_to_images(pdf_file, image_format) def download_images_as_zip_with_format(pdf_file, image_format): image_paths = pdf_to_images(pdf_file, image_format) return images_to_zip(image_paths) single_pdf_input.change( convert_pdf_to_images_with_format, inputs=[single_pdf_input, image_format_option], outputs=[image_output] ) download_zip_button.click( download_images_as_zip_with_format, inputs=[single_pdf_input, image_format_option], outputs=[zip_result] ) with gr.TabItem("Image to PDF Converter"): image_input = gr.File(label="Upload Images to Convert to PDF", file_types=[".jpg", ".png"], file_count="multiple") order_option = gr.Radio(label="Select Order Type", choices=["Ordered", "Reverse", "Custom"], value="Ordered") custom_order_input = gr.Textbox(label="Enter custom order (comma-separated indices)", visible=False) image_gallery = gr.Gallery(label="Images Preview (Arrange Order)", show_label=True) pdf_result = gr.File(label="Download PDF") def update_custom_order_visibility(order_type): return gr.update(visible=(order_type == "Custom")) def sort_images(order_type, custom_order, images): if order_type == "Reverse": return images[::-1] elif order_type == "Custom": try: indices = [int(i.strip()) - 1 for i in custom_order.split(',')] return [images[i] for i in indices] except (ValueError, IndexError): return gr.Error("Invalid custom order. Ensure all indices are valid and within range.") return images order_option.change( update_custom_order_visibility, inputs=[order_option], outputs=[custom_order_input] ) gr.Button("Preview Sorted Images").click( lambda order_type, custom_order, images: sort_images(order_type, custom_order, images), inputs=[order_option, custom_order_input, image_input], outputs=[image_gallery] ) gr.Button("Generate PDF").click( lambda order_type, custom_order, images: images_to_pdf(sort_images(order_type, custom_order, images)), inputs=[order_option, custom_order_input, image_input], outputs=[pdf_result] ) with gr.TabItem("PDF to DOCX Converter"): gr.Markdown("Some PDF files may not be converted properly due to the complexity of the PDF file") pdf_to_docx_input = gr.File(label="Upload PDF File to Convert to DOCX", file_types=[".pdf"], file_count="single") convert_button = gr.Button("Convert to DOCX") docx_result = gr.File(label="Download DOCX") def convert_pdf_to_docx_with_button(pdf_file): return pdf_to_docx(pdf_file) convert_button.click( convert_pdf_to_docx_with_button, inputs=[pdf_to_docx_input], outputs=[docx_result] ) # Launch the Gradio app demo.launch() # Clean up temporary files def cleanup_temp_files(): temp_dir = tempfile.gettempdir() for filename in os.listdir(temp_dir): if filename.endswith('.pdf') or filename.endswith('.jpg') or filename.endswith('.png') or filename.endswith('.docx'): os.remove(os.path.join(temp_dir, filename)) atexit.register(cleanup_temp_files)