davanstrien HF staff commited on
Commit
1af90f1
·
1 Parent(s): 78b9829
Files changed (1) hide show
  1. app.py +12 -22
app.py CHANGED
@@ -31,7 +31,9 @@ def process_pdf(pdf_file, sample_size, temp_dir, progress=gr.Progress()):
31
  )
32
 
33
  images = []
34
- for i, page_num in enumerate(selected_pages):
 
 
35
  page = doc[page_num]
36
  pix = page.get_pixmap()
37
  image_path = os.path.join(
@@ -39,10 +41,6 @@ def process_pdf(pdf_file, sample_size, temp_dir, progress=gr.Progress()):
39
  )
40
  pix.save(image_path)
41
  images.append(image_path)
42
- progress(
43
- (i + 1) / len(selected_pages),
44
- desc=f"Converting {os.path.basename(pdf_path)}",
45
- )
46
 
47
  doc.close()
48
  return images, None
@@ -58,23 +56,15 @@ def pdf_to_images(pdf_files, sample_size, temp_dir, progress=gr.Progress()):
58
  all_images = []
59
  skipped_pdfs = []
60
 
61
- with ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
62
- future_to_pdf = {
63
- executor.submit(
64
- process_pdf, pdf_file, sample_size, temp_dir, progress
65
- ): pdf_file
66
- for pdf_file in pdf_files
67
- }
68
-
69
- for i, future in enumerate(as_completed(future_to_pdf)):
70
- pdf_file = future_to_pdf[future]
71
- images, error = future.result()
72
- if error:
73
- skipped_pdfs.append(pdf_file.name)
74
- gr.Info(error)
75
- else:
76
- all_images.extend(images)
77
- progress((i + 1) / len(pdf_files), desc="Converting PDFs")
78
 
79
  message = f"Saved {len(all_images)} images to temporary directory"
80
  if skipped_pdfs:
 
31
  )
32
 
33
  images = []
34
+ for page_num in progress.tqdm(
35
+ selected_pages, desc=f"Converting {os.path.basename(pdf_path)}", unit="page"
36
+ ):
37
  page = doc[page_num]
38
  pix = page.get_pixmap()
39
  image_path = os.path.join(
 
41
  )
42
  pix.save(image_path)
43
  images.append(image_path)
 
 
 
 
44
 
45
  doc.close()
46
  return images, None
 
56
  all_images = []
57
  skipped_pdfs = []
58
 
59
+ for i, pdf_file in enumerate(
60
+ progress.tqdm(pdf_files, desc="Converting PDFs", unit="PDF")
61
+ ):
62
+ images, error = process_pdf(pdf_file, sample_size, temp_dir, progress)
63
+ if error:
64
+ skipped_pdfs.append(pdf_file.name)
65
+ gr.Info(error)
66
+ else:
67
+ all_images.extend(images)
 
 
 
 
 
 
 
 
68
 
69
  message = f"Saved {len(all_images)} images to temporary directory"
70
  if skipped_pdfs: