Spaces:
Running
on
Zero
Running
on
Zero
ferferefer
commited on
Commit
·
6189ad0
1
Parent(s):
b711628
pdf?
Browse files- app.py +46 -12
- requirements.txt +2 -2
app.py
CHANGED
@@ -42,17 +42,46 @@ processor = Qwen2VLProcessor.from_pretrained(model_id)
|
|
42 |
|
43 |
def process_uploaded_file(file_obj):
|
44 |
"""Process uploaded file whether it's an image or PDF"""
|
45 |
-
# Get the file extension
|
46 |
file_extension = Path(file_obj.name).suffix.lower()
|
47 |
|
48 |
try:
|
49 |
if file_extension == '.pdf':
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
else:
|
54 |
# Handle regular image files
|
55 |
-
|
|
|
|
|
|
|
|
|
56 |
except Exception as e:
|
57 |
raise Exception(f"Error processing file {file_obj.name}: {str(e)}")
|
58 |
|
@@ -78,21 +107,26 @@ def run_example(files, text_input=None):
|
|
78 |
for file in files:
|
79 |
try:
|
80 |
images = process_uploaded_file(file)
|
81 |
-
|
|
|
82 |
except Exception as e:
|
83 |
return f"Error processing file {file.name}: {str(e)}"
|
84 |
|
85 |
if not processed_images:
|
86 |
-
return "No valid images were processed. Please check your files."
|
87 |
|
88 |
# Save processed images temporarily
|
89 |
image_paths = []
|
90 |
for idx, img in enumerate(processed_images):
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
96 |
|
97 |
# Create messages with multiple images
|
98 |
messages = [
|
|
|
42 |
|
43 |
def process_uploaded_file(file_obj):
|
44 |
"""Process uploaded file whether it's an image or PDF"""
|
|
|
45 |
file_extension = Path(file_obj.name).suffix.lower()
|
46 |
|
47 |
try:
|
48 |
if file_extension == '.pdf':
|
49 |
+
try:
|
50 |
+
# For PDF files, we need to use a temporary directory
|
51 |
+
with tempfile.TemporaryDirectory() as temp_dir:
|
52 |
+
temp_pdf_path = os.path.join(temp_dir, "temp.pdf")
|
53 |
+
|
54 |
+
# Save the uploaded PDF to the temporary path
|
55 |
+
with open(file_obj.name, 'rb') as src_file:
|
56 |
+
with open(temp_pdf_path, 'wb') as dst_file:
|
57 |
+
dst_file.write(src_file.read())
|
58 |
+
|
59 |
+
# Convert PDF to images using pdf2image
|
60 |
+
try:
|
61 |
+
images = pdf2image.convert_from_path(
|
62 |
+
temp_pdf_path,
|
63 |
+
poppler_path=None, # Will use system poppler if available
|
64 |
+
dpi=200, # Adjust DPI as needed
|
65 |
+
fmt='PNG'
|
66 |
+
)
|
67 |
+
return images
|
68 |
+
except Exception as pdf_error:
|
69 |
+
if "poppler" in str(pdf_error).lower():
|
70 |
+
raise Exception(
|
71 |
+
"PDF processing requires poppler to be installed. "
|
72 |
+
"Please install poppler-utils package on your system. "
|
73 |
+
"On Ubuntu/Debian: sudo apt-get install -y poppler-utils"
|
74 |
+
)
|
75 |
+
raise
|
76 |
+
except Exception as e:
|
77 |
+
raise Exception(f"PDF processing error: {str(e)}")
|
78 |
else:
|
79 |
# Handle regular image files
|
80 |
+
try:
|
81 |
+
img = Image.open(file_obj.name)
|
82 |
+
return [img]
|
83 |
+
except Exception as e:
|
84 |
+
raise Exception(f"Image processing error: {str(e)}")
|
85 |
except Exception as e:
|
86 |
raise Exception(f"Error processing file {file_obj.name}: {str(e)}")
|
87 |
|
|
|
107 |
for file in files:
|
108 |
try:
|
109 |
images = process_uploaded_file(file)
|
110 |
+
if images: # Only add if we got valid images
|
111 |
+
processed_images.extend(images)
|
112 |
except Exception as e:
|
113 |
return f"Error processing file {file.name}: {str(e)}"
|
114 |
|
115 |
if not processed_images:
|
116 |
+
return "No valid images were processed. Please check your files and ensure they are valid image or PDF files."
|
117 |
|
118 |
# Save processed images temporarily
|
119 |
image_paths = []
|
120 |
for idx, img in enumerate(processed_images):
|
121 |
+
try:
|
122 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
123 |
+
filename = f"temp_image_{timestamp}_{idx}.png"
|
124 |
+
img.save(filename)
|
125 |
+
image_paths.append(filename)
|
126 |
+
temp_paths.append(filename)
|
127 |
+
except Exception as e:
|
128 |
+
cleanup_temp_files(temp_paths) # Clean up any files we've created so far
|
129 |
+
return f"Error saving processed image: {str(e)}"
|
130 |
|
131 |
# Create messages with multiple images
|
132 |
messages = [
|
requirements.txt
CHANGED
@@ -7,6 +7,6 @@ git+https://github.com/huggingface/transformers.git
|
|
7 |
accelerate
|
8 |
qwen-vl-utils
|
9 |
peft
|
10 |
-
pdf2image
|
11 |
-
pypdf2
|
12 |
gradio>=4.0.0
|
|
|
7 |
accelerate
|
8 |
qwen-vl-utils
|
9 |
peft
|
10 |
+
pdf2image>=1.16.3
|
11 |
+
pypdf2>=3.0.0
|
12 |
gradio>=4.0.0
|