Update app.py
Browse files
app.py
CHANGED
@@ -3,27 +3,37 @@ import torch
|
|
3 |
from transformers import AutoModelForObjectDetection, TableTransformerForObjectDetection
|
4 |
from PIL import Image
|
5 |
from pdf_processing import process_pdf
|
|
|
6 |
|
7 |
-
#
|
8 |
def process_pdf_file(pdf_file):
|
9 |
pdf_path = pdf_file.name # Get the path to the uploaded PDF
|
10 |
output_folder = "./output" # Define the output folder
|
11 |
|
12 |
-
#
|
13 |
-
process_pdf(pdf_path, output_folder)
|
14 |
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
# Create Gradio interface
|
18 |
def create_gradio_interface():
|
19 |
input_pdf = gr.File(label="Upload PDF", type="filepath") # Fixed argument here
|
20 |
-
|
|
|
21 |
|
22 |
# Define the interface
|
23 |
interface = gr.Interface(
|
24 |
fn=process_pdf_file,
|
25 |
inputs=input_pdf,
|
26 |
-
outputs=
|
27 |
live=True
|
28 |
)
|
29 |
return interface
|
|
|
3 |
from transformers import AutoModelForObjectDetection, TableTransformerForObjectDetection
|
4 |
from PIL import Image
|
5 |
from pdf_processing import process_pdf
|
6 |
+
from io_utils import save_remaining_text_to_txt, save_to_csv
|
7 |
|
8 |
+
# Process PDF file and save text or data to file
|
9 |
def process_pdf_file(pdf_file):
|
10 |
pdf_path = pdf_file.name # Get the path to the uploaded PDF
|
11 |
output_folder = "./output" # Define the output folder
|
12 |
|
13 |
+
# Process the PDF file (assuming the function does the necessary work)
|
14 |
+
processed_text = process_pdf(pdf_path, output_folder) # Modify this based on your function's output
|
15 |
|
16 |
+
# Save processed text to a text file
|
17 |
+
txt_file_path = save_remaining_text_to_txt(processed_text, output_folder, 1) # Assuming page_num = 1 for now
|
18 |
+
|
19 |
+
# Alternatively, you could generate CSV or any other output
|
20 |
+
data = [] # Replace with your data
|
21 |
+
csv_file_path = save_to_csv(data, output_folder, 'processed_data.csv')
|
22 |
+
|
23 |
+
# Return file paths as outputs for Gradio
|
24 |
+
return txt_file_path, csv_file_path
|
25 |
|
26 |
# Create Gradio interface
|
27 |
def create_gradio_interface():
|
28 |
input_pdf = gr.File(label="Upload PDF", type="filepath") # Fixed argument here
|
29 |
+
output_txt_file = gr.File(label="Download Processed Text", interactive=False)
|
30 |
+
output_csv_file = gr.File(label="Download Processed CSV", interactive=False)
|
31 |
|
32 |
# Define the interface
|
33 |
interface = gr.Interface(
|
34 |
fn=process_pdf_file,
|
35 |
inputs=input_pdf,
|
36 |
+
outputs=[output_txt_file, output_csv_file],
|
37 |
live=True
|
38 |
)
|
39 |
return interface
|