ShadowDominator commited on
Commit
febd231
·
1 Parent(s): 83cbf17

Upload 4 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. 1706.03762.pdf +3 -0
  3. app.py +108 -0
  4. packages.txt +1 -0
  5. requirements.txt +2 -0
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ 1706.03762.pdf filter=lfs diff=lfs merge=lfs -text
1706.03762.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfaaec89262875f927cf1b38b2da2d775f3309b7bea3537f29b606ca67e79065
3
+ size 2201700
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from pdf2image import convert_from_path,pdfinfo_from_path
4
+ import zipfile
5
+
6
+ def zip_folder(folder_path, output_path):
7
+ with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
8
+ for root, dirs, files in os.walk(folder_path):
9
+ for file in files:
10
+ file_path = os.path.join(root, file)
11
+ zipf.write(file_path, os.path.relpath(file_path, folder_path))
12
+
13
+
14
+
15
+ DIRECTORY = "image_reference"
16
+ DIRECTORY_OUTPUT = "output"
17
+ DIRECTORIES = [DIRECTORY, DIRECTORY_OUTPUT]
18
+
19
+ # Check and create directories
20
+ for directory in DIRECTORIES:
21
+ if not os.path.exists(directory):
22
+ os.makedirs(directory)
23
+ else:
24
+ pass
25
+
26
+
27
+ ALLOWED_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.gif']
28
+ def get_image_files(directory):
29
+ image_files = []
30
+ for filename in os.listdir(directory):
31
+ if filename.lower().endswith(tuple(ALLOWED_EXTENSIONS)):
32
+ filepath = os.path.join(directory, filename)
33
+ image_files.append(filepath)
34
+ return image_files
35
+
36
+
37
+ def clear_directory(directory):
38
+ for filename in os.listdir(directory):
39
+ file_path = os.path.join(directory, filename)
40
+ try:
41
+ if os.path.isfile(file_path) or os.path.islink(file_path):
42
+ os.unlink(file_path)
43
+ elif os.path.isdir(file_path):
44
+ os.rmdir(file_path)
45
+ except Exception as e:
46
+ print(f"Failed to delete {file_path}. Reason: {e}")
47
+
48
+
49
+
50
+ def extract_photos_from_pdf(file_pdf):
51
+ clear_directory(DIRECTORY)
52
+ clear_directory(DIRECTORY_OUTPUT)
53
+ try:
54
+ pdf_path = file_pdf.name
55
+ info = pdfinfo_from_path(pdf_path, userpw=None, poppler_path=None)
56
+
57
+ total_pages = info["Pages"] # Total number of pages in the PDF book
58
+ batch_size = 100 # Number of pages to process in each batch
59
+
60
+ for start_page in range(0, total_pages, batch_size):
61
+ end_page = min(start_page + batch_size, total_pages)
62
+ images = convert_from_path(pdf_path, first_page=start_page, last_page=end_page)
63
+ for idx, image in enumerate(images, start=start_page):
64
+ image.save(f'{DIRECTORY}/{idx+1}.png', 'PNG')
65
+
66
+ images_pdf_list = get_image_files(DIRECTORY)
67
+ image_names = [(path, os.path.basename(path)) for path in images_pdf_list]
68
+ sorted_names = sorted(image_names, key=lambda x: int(x[1].split('.')[0]))
69
+ zip_folder(DIRECTORY, f'{DIRECTORY_OUTPUT}/all_photos.zip')
70
+ return (
71
+ gr.Gallery.update(value=sorted_names, label=f"Detected {len(images_pdf_list)} Page{'' if len(images_pdf_list) == 1 else 's'}", show_label=True, visible=True),
72
+ gr.File.update(value=f'{DIRECTORY_OUTPUT}/all_photos.zip',visible=True)
73
+ )
74
+ except:
75
+ return (
76
+ gr.Gallery.update(value=[], label="Error", show_label=True, visible=True),
77
+ gr.File.update(visible=False)
78
+ )
79
+
80
+ with gr.Blocks() as demo:
81
+ with gr.Tabs() as tabs:
82
+
83
+ with gr.TabItem("PDF",id=0):
84
+
85
+ with gr.Row():
86
+ with gr.Column():
87
+ proegres = gr.Text(show_label=False,value="",visible=False)
88
+ file_pdf = gr.File(file_types=['.pdf'], label="Upload PDF *")
89
+ btn = gr.Button("Extract Photos from PDF")
90
+
91
+
92
+ with gr.Tabs(visible=True) as tabs_under:
93
+
94
+ with gr.TabItem("Photos",id=0):
95
+
96
+ with gr.Column():
97
+
98
+ list_image = gr.Gallery(value=[], label=f"0 Page",visible=True, show_label=True, elem_id="gallery").style(columns=[3], object_fit="cover", height="auto")
99
+ file_download = gr.File(file_types=['.zip'], label="Download File",visible=False)
100
+
101
+
102
+ examples = gr.Examples([["./1706.03762.pdf", None]], fn=extract_photos_from_pdf,inputs=[file_pdf],outputs=[list_image,file_download], cache_examples=False)
103
+ btn.click(fn=extract_photos_from_pdf,inputs=[file_pdf],outputs=[list_image,file_download])
104
+
105
+
106
+
107
+
108
+ demo.queue().launch()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ poppler-utils
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio==3.32.0
2
+ pdf2image==1.16.3