Spaces:

bacngv
/

PDF2TEXT

Sleeping

App Files Files Community

PDF2TEXT / app.py

bacngv

Update app.py

bfece13 verified 2 months ago

raw

history blame

1.72 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForObjectDetection, TableTransformerForObjectDetection
	from PIL import Image
	from pdf_processing import process_pdf
	from io_utils import save_remaining_text_to_txt, save_to_csv

	# Process PDF file and save text or data to file
	def process_pdf_file(pdf_file):
	pdf_path = pdf_file.name # Get the path to the uploaded PDF
	output_folder = "./output" # Define the output folder

	# Process the PDF file (assuming the function does the necessary work)
	processed_text = process_pdf(pdf_path, output_folder) # Modify this based on your function's output

	# Save processed text to a text file
	txt_file_path = save_remaining_text_to_txt(processed_text, output_folder, 1) # Assuming page_num = 1 for now

	# Alternatively, you could generate CSV or any other output
	data = [] # Replace with your data
	csv_file_path = save_to_csv(data, output_folder, 'processed_data.csv')

	# Return file paths as outputs for Gradio
	return txt_file_path, csv_file_path

	# Create Gradio interface
	def create_gradio_interface():
	input_pdf = gr.File(label="Upload PDF", type="filepath") # Fixed argument here
	output_txt_file = gr.File(label="Download Processed Text", interactive=False)
	output_csv_file = gr.File(label="Download Processed CSV", interactive=False)

	# Define the interface
	interface = gr.Interface(
	fn=process_pdf_file,
	inputs=input_pdf,
	outputs=[output_txt_file, output_csv_file],
	live=True
	)
	return interface

	# Start the application
	if __name__ == "__main__":
	gradio_interface = create_gradio_interface()
	gradio_interface.launch()