Spaces:
Running
Running
IsakNordgren
commited on
Commit
·
0e6c3f4
1
Parent(s):
aaaf3f2
display uploaded pdf
Browse files- Examples/test.txt +0 -0
- __pycache__/page.cpython-310.pyc +0 -0
- __pycache__/summarize.cpython-310.pyc +0 -0
- page.py +15 -2
- requirements.txt +1 -0
- summarize.py +22 -4
Examples/test.txt
DELETED
File without changes
|
__pycache__/page.cpython-310.pyc
CHANGED
Binary files a/__pycache__/page.cpython-310.pyc and b/__pycache__/page.cpython-310.pyc differ
|
|
__pycache__/summarize.cpython-310.pyc
CHANGED
Binary files a/__pycache__/summarize.cpython-310.pyc and b/__pycache__/summarize.cpython-310.pyc differ
|
|
page.py
CHANGED
@@ -1,8 +1,14 @@
|
|
1 |
import streamlit as st
|
|
|
|
|
2 |
from summarize import Summarizer
|
3 |
import pdfplumber
|
4 |
|
5 |
def createDemoPage(path):
|
|
|
|
|
|
|
|
|
6 |
with st.spinner("Summarizing text..."):
|
7 |
summarizer = Summarizer(model = "groq")
|
8 |
text = readpdf("Examples/Kris.pdf")
|
@@ -11,8 +17,15 @@ def createDemoPage(path):
|
|
11 |
st.subheader("Summary")
|
12 |
st.write(summary)
|
13 |
|
14 |
-
st.subheader("Extracted Text")
|
15 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
def readpdf(path):
|
18 |
text = ""
|
|
|
1 |
import streamlit as st
|
2 |
+
from streamlit import session_state as ss
|
3 |
+
from streamlit_pdf_viewer import pdf_viewer
|
4 |
from summarize import Summarizer
|
5 |
import pdfplumber
|
6 |
|
7 |
def createDemoPage(path):
|
8 |
+
# For displaying pdf
|
9 |
+
if 'pdf_ref' not in ss:
|
10 |
+
ss.pdf_ref = None
|
11 |
+
|
12 |
with st.spinner("Summarizing text..."):
|
13 |
summarizer = Summarizer(model = "groq")
|
14 |
text = readpdf("Examples/Kris.pdf")
|
|
|
17 |
st.subheader("Summary")
|
18 |
st.write(summary)
|
19 |
|
20 |
+
#st.subheader("Extracted Text")
|
21 |
+
with st.expander("Extracted Text", expanded = False):
|
22 |
+
st.write(text)
|
23 |
+
|
24 |
+
st.subheader("Original pdf")
|
25 |
+
with open(path, 'rb') as pdf_ref:
|
26 |
+
bytes_data = pdf_ref.read()
|
27 |
+
pdf_viewer(input=bytes_data, width=700)
|
28 |
+
|
29 |
|
30 |
def readpdf(path):
|
31 |
text = ""
|
requirements.txt
CHANGED
@@ -6,3 +6,4 @@ transformers
|
|
6 |
torch
|
7 |
groq
|
8 |
python-dotenv
|
|
|
|
6 |
torch
|
7 |
groq
|
8 |
python-dotenv
|
9 |
+
streamlit_pdf_viewer
|
summarize.py
CHANGED
@@ -15,18 +15,31 @@ from transformers import AutoTokenizer
|
|
15 |
import transformers
|
16 |
import torch
|
17 |
|
|
|
|
|
|
|
18 |
class Summarizer:
|
19 |
|
20 |
def __init__(self, model = "groq"):
|
21 |
self.model = model
|
22 |
|
23 |
def run_app(self):
|
24 |
-
|
|
|
|
|
|
|
|
|
25 |
|
26 |
if uploaded_file is not None:
|
|
|
27 |
if uploaded_file.type == "application/pdf":
|
28 |
with st.spinner("Extracting text from PDF..."):
|
29 |
text = self.extract_text_from_pdf(uploaded_file)
|
|
|
|
|
|
|
|
|
|
|
30 |
else:
|
31 |
image = Image.open(uploaded_file)
|
32 |
with st.spinner("Extracting text from image..."):
|
@@ -37,9 +50,14 @@ class Summarizer:
|
|
37 |
summary = self.summarize_using_groq(text)
|
38 |
st.subheader("Summary")
|
39 |
st.write(summary)
|
40 |
-
|
41 |
-
st.
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
|
45 |
# Function to extract text from an image
|
|
|
15 |
import transformers
|
16 |
import torch
|
17 |
|
18 |
+
from streamlit import session_state as ss
|
19 |
+
from streamlit_pdf_viewer import pdf_viewer
|
20 |
+
|
21 |
class Summarizer:
|
22 |
|
23 |
def __init__(self, model = "groq"):
|
24 |
self.model = model
|
25 |
|
26 |
def run_app(self):
|
27 |
+
# For displaying pdf
|
28 |
+
if 'pdf_ref' not in ss:
|
29 |
+
ss.pdf_ref = None
|
30 |
+
|
31 |
+
uploaded_file = st.file_uploader("Upload an Image or PDF", type=["jpg", "jpeg", "png", "pdf"], key="file")
|
32 |
|
33 |
if uploaded_file is not None:
|
34 |
+
|
35 |
if uploaded_file.type == "application/pdf":
|
36 |
with st.spinner("Extracting text from PDF..."):
|
37 |
text = self.extract_text_from_pdf(uploaded_file)
|
38 |
+
|
39 |
+
|
40 |
+
if ss.file:
|
41 |
+
ss.pdf_ref = ss.file
|
42 |
+
|
43 |
else:
|
44 |
image = Image.open(uploaded_file)
|
45 |
with st.spinner("Extracting text from image..."):
|
|
|
50 |
summary = self.summarize_using_groq(text)
|
51 |
st.subheader("Summary")
|
52 |
st.write(summary)
|
53 |
+
|
54 |
+
with st.expander("Extracted Text", expanded = False):
|
55 |
+
st.write(text)
|
56 |
+
|
57 |
+
if ss.pdf_ref:
|
58 |
+
st.subheader("Original pdf")
|
59 |
+
binary_data = ss.pdf_ref.getvalue()
|
60 |
+
pdf_viewer(input=binary_data, width=700)
|
61 |
|
62 |
|
63 |
# Function to extract text from an image
|