IsakNordgren commited on
Commit
0e6c3f4
·
1 Parent(s): aaaf3f2

display uploaded pdf

Browse files
Examples/test.txt DELETED
File without changes
__pycache__/page.cpython-310.pyc CHANGED
Binary files a/__pycache__/page.cpython-310.pyc and b/__pycache__/page.cpython-310.pyc differ
 
__pycache__/summarize.cpython-310.pyc CHANGED
Binary files a/__pycache__/summarize.cpython-310.pyc and b/__pycache__/summarize.cpython-310.pyc differ
 
page.py CHANGED
@@ -1,8 +1,14 @@
1
  import streamlit as st
 
 
2
  from summarize import Summarizer
3
  import pdfplumber
4
 
5
  def createDemoPage(path):
 
 
 
 
6
  with st.spinner("Summarizing text..."):
7
  summarizer = Summarizer(model = "groq")
8
  text = readpdf("Examples/Kris.pdf")
@@ -11,8 +17,15 @@ def createDemoPage(path):
11
  st.subheader("Summary")
12
  st.write(summary)
13
 
14
- st.subheader("Extracted Text")
15
- st.write(text)
 
 
 
 
 
 
 
16
 
17
  def readpdf(path):
18
  text = ""
 
1
  import streamlit as st
2
+ from streamlit import session_state as ss
3
+ from streamlit_pdf_viewer import pdf_viewer
4
  from summarize import Summarizer
5
  import pdfplumber
6
 
7
  def createDemoPage(path):
8
+ # For displaying pdf
9
+ if 'pdf_ref' not in ss:
10
+ ss.pdf_ref = None
11
+
12
  with st.spinner("Summarizing text..."):
13
  summarizer = Summarizer(model = "groq")
14
  text = readpdf("Examples/Kris.pdf")
 
17
  st.subheader("Summary")
18
  st.write(summary)
19
 
20
+ #st.subheader("Extracted Text")
21
+ with st.expander("Extracted Text", expanded = False):
22
+ st.write(text)
23
+
24
+ st.subheader("Original pdf")
25
+ with open(path, 'rb') as pdf_ref:
26
+ bytes_data = pdf_ref.read()
27
+ pdf_viewer(input=bytes_data, width=700)
28
+
29
 
30
  def readpdf(path):
31
  text = ""
requirements.txt CHANGED
@@ -6,3 +6,4 @@ transformers
6
  torch
7
  groq
8
  python-dotenv
 
 
6
  torch
7
  groq
8
  python-dotenv
9
+ streamlit_pdf_viewer
summarize.py CHANGED
@@ -15,18 +15,31 @@ from transformers import AutoTokenizer
15
  import transformers
16
  import torch
17
 
 
 
 
18
  class Summarizer:
19
 
20
  def __init__(self, model = "groq"):
21
  self.model = model
22
 
23
  def run_app(self):
24
- uploaded_file = st.file_uploader("Upload an Image or PDF", type=["jpg", "jpeg", "png", "pdf"])
 
 
 
 
25
 
26
  if uploaded_file is not None:
 
27
  if uploaded_file.type == "application/pdf":
28
  with st.spinner("Extracting text from PDF..."):
29
  text = self.extract_text_from_pdf(uploaded_file)
 
 
 
 
 
30
  else:
31
  image = Image.open(uploaded_file)
32
  with st.spinner("Extracting text from image..."):
@@ -37,9 +50,14 @@ class Summarizer:
37
  summary = self.summarize_using_groq(text)
38
  st.subheader("Summary")
39
  st.write(summary)
40
-
41
- st.subheader("Extracted Text")
42
- st.write(text)
 
 
 
 
 
43
 
44
 
45
  # Function to extract text from an image
 
15
  import transformers
16
  import torch
17
 
18
+ from streamlit import session_state as ss
19
+ from streamlit_pdf_viewer import pdf_viewer
20
+
21
  class Summarizer:
22
 
23
  def __init__(self, model = "groq"):
24
  self.model = model
25
 
26
  def run_app(self):
27
+ # For displaying pdf
28
+ if 'pdf_ref' not in ss:
29
+ ss.pdf_ref = None
30
+
31
+ uploaded_file = st.file_uploader("Upload an Image or PDF", type=["jpg", "jpeg", "png", "pdf"], key="file")
32
 
33
  if uploaded_file is not None:
34
+
35
  if uploaded_file.type == "application/pdf":
36
  with st.spinner("Extracting text from PDF..."):
37
  text = self.extract_text_from_pdf(uploaded_file)
38
+
39
+
40
+ if ss.file:
41
+ ss.pdf_ref = ss.file
42
+
43
  else:
44
  image = Image.open(uploaded_file)
45
  with st.spinner("Extracting text from image..."):
 
50
  summary = self.summarize_using_groq(text)
51
  st.subheader("Summary")
52
  st.write(summary)
53
+
54
+ with st.expander("Extracted Text", expanded = False):
55
+ st.write(text)
56
+
57
+ if ss.pdf_ref:
58
+ st.subheader("Original pdf")
59
+ binary_data = ss.pdf_ref.getvalue()
60
+ pdf_viewer(input=binary_data, width=700)
61
 
62
 
63
  # Function to extract text from an image