cstr commited on
Commit
b74b8ba
·
verified ·
1 Parent(s): ffe0a95

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -31
app.py CHANGED
@@ -781,46 +781,32 @@ with gr.Blocks(css="""
781
 
782
  # PDF Processing Handlers
783
  def handle_pdf_process(pdf, fmt, ctx_size):
784
- """Process PDF and update UI state"""
785
  if not pdf:
786
- return (
787
- "Please upload a PDF file.", # progress_status
788
- "", # processed_text
789
- "", # pdf_content
790
- [], # snippets
791
- gr.update(choices=[], value=None), # snippet_selector
792
- None # download_files
793
- )
794
-
795
  try:
796
- # Extract and format text
797
  text = extract_text_from_pdf(pdf.name)
798
  if text.startswith("Error"):
799
- return (
800
- text,
801
- "",
802
- "",
803
- [],
804
- gr.update(choices=[], value=None),
805
- None
806
- )
807
-
808
- formatted_text = format_content(text, fmt)
809
  snippets_list = split_into_snippets(formatted_text, ctx_size)
810
-
811
- # Create downloadable full text
812
- with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.txt') as f:
813
- f.write(formatted_text)
814
  download_file = f.name
815
-
 
 
816
  return (
817
  f"PDF processed successfully! Generated {len(snippets_list)} snippets.",
818
- formatted_text,
819
- formatted_text,
820
  snippets_list,
821
- gr.update(choices=update_snippet_choices(snippets_list), value="Snippet 1 of " + str(len(snippets_list))),
822
- download_file # Return the file for download_full_text
823
- #[download_file]
824
  )
825
 
826
  except Exception as e:
 
781
 
782
  # PDF Processing Handlers
783
  def handle_pdf_process(pdf, fmt, ctx_size):
784
+ """Process PDF, format text, and return formatted text and snippets."""
785
  if not pdf:
786
+ return "Please upload a PDF file.", "", "", [], gr.update(choices=[], value=None), None
787
+
 
 
 
 
 
 
 
788
  try:
 
789
  text = extract_text_from_pdf(pdf.name)
790
  if text.startswith("Error"):
791
+ return text, "", "", [], gr.update(choices=[], value=None), None
792
+
793
+ # Format the text *before* splitting into snippets:
794
+ formatted_text = format_content(text, fmt) # Call format_content here!
 
 
 
 
 
 
795
  snippets_list = split_into_snippets(formatted_text, ctx_size)
796
+
797
+ with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix=f'.{fmt}') as f: # Correct suffix
798
+ f.write(formatted_text) # Write the *formatted* text
 
799
  download_file = f.name
800
+
801
+ snippet_choices = update_snippet_choices(snippets_list) # Pre-calculate choices
802
+
803
  return (
804
  f"PDF processed successfully! Generated {len(snippets_list)} snippets.",
805
+ formatted_text, # Return the *formatted* text
806
+ formatted_text, # Update the state with formatted text
807
  snippets_list,
808
+ gr.update(choices=snippet_choices, value=snippet_choices[0] if snippet_choices else None),
809
+ download_file
 
810
  )
811
 
812
  except Exception as e: