|
import gradio as gr |
|
import torch |
|
from transformers import AutoModelForObjectDetection, TableTransformerForObjectDetection |
|
from PIL import Image |
|
from pdf_processing import process_pdf |
|
from io_utils import save_remaining_text_to_txt, save_to_csv |
|
|
|
|
|
def process_pdf_file(pdf_file): |
|
pdf_path = pdf_file.name |
|
output_folder = "./output" |
|
|
|
|
|
processed_text = process_pdf(pdf_path, output_folder) |
|
|
|
|
|
txt_file_path = save_remaining_text_to_txt(processed_text, output_folder, 1) |
|
|
|
|
|
data = [] |
|
csv_file_path = save_to_csv(data, output_folder, 'processed_data.csv') |
|
|
|
|
|
return txt_file_path, csv_file_path |
|
|
|
|
|
def create_gradio_interface(): |
|
input_pdf = gr.File(label="Upload PDF", type="filepath") |
|
output_txt_file = gr.File(label="Download Processed Text", interactive=False) |
|
output_csv_file = gr.File(label="Download Processed CSV", interactive=False) |
|
|
|
|
|
interface = gr.Interface( |
|
fn=process_pdf_file, |
|
inputs=input_pdf, |
|
outputs=[output_txt_file, output_csv_file], |
|
live=True |
|
) |
|
return interface |
|
|
|
|
|
if __name__ == "__main__": |
|
gradio_interface = create_gradio_interface() |
|
gradio_interface.launch() |