Spaces:
Running
Running
Create app.py
#8
by
mishrasahil934
- opened
app.py
CHANGED
@@ -1,15 +1,10 @@
|
|
1 |
-
from dotenv import load_dotenv
|
2 |
-
load_dotenv()
|
3 |
-
from tempfile import NamedTemporaryFile
|
4 |
import os
|
|
|
|
|
5 |
import streamlit as st
|
6 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
7 |
-
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
|
8 |
-
from langchain.chains.summarize import load_summarize_chain
|
9 |
from transformers import pipeline
|
10 |
-
import
|
11 |
-
import
|
12 |
-
|
13 |
|
14 |
# Load model directly
|
15 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
@@ -17,7 +12,7 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
|
17 |
tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
|
18 |
base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
|
19 |
|
20 |
-
#
|
21 |
def file_preprocessing(file):
|
22 |
loader = PyPDFLoader(file)
|
23 |
pages = loader.load_and_split()
|
@@ -25,60 +20,37 @@ def file_preprocessing(file):
|
|
25 |
texts = text_splitter.split_documents(pages)
|
26 |
final_texts = ""
|
27 |
for text in texts:
|
28 |
-
|
29 |
-
final_texts = final_texts + text.page_content
|
30 |
return final_texts
|
31 |
|
32 |
-
#
|
33 |
-
def
|
34 |
-
pipe_sum = pipeline(
|
35 |
-
'summarization',
|
36 |
-
model = base_model,
|
37 |
-
tokenizer = tokenizer,
|
38 |
-
max_length = 500,
|
39 |
-
min_length = 50
|
40 |
-
)
|
41 |
-
input_text = file_preprocessing(filepath)
|
42 |
-
result = pipe_sum(input_text)
|
43 |
-
result = result[0]['summary_text']
|
44 |
-
return result
|
45 |
-
def llm_pipleline1(ans):
|
46 |
pipe_sum = pipeline(
|
47 |
'summarization',
|
48 |
-
model
|
49 |
-
tokenizer
|
50 |
-
max_length
|
51 |
-
min_length
|
52 |
)
|
53 |
-
input_text =""+ ans
|
54 |
result = pipe_sum(input_text)
|
55 |
-
|
56 |
-
return result
|
57 |
|
58 |
@st.cache_data
|
59 |
# Function to display the PDF file
|
60 |
-
def displayPDF(
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
# Embedding PDF in HTML
|
66 |
-
pdf_display = f'<iframe src="data:application/pdf;base64,{base_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
|
67 |
-
|
68 |
-
# Displaying the file
|
69 |
st.markdown(pdf_display, unsafe_allow_html=True)
|
70 |
|
71 |
-
|
72 |
-
#streamlit code
|
73 |
-
st.set_page_config(layout='wide')
|
74 |
-
|
75 |
def main():
|
76 |
st.title('Content Summarizer')
|
77 |
|
|
|
78 |
uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
|
79 |
-
|
80 |
if uploaded_file is not None:
|
81 |
-
if st.button("Summarize"):
|
82 |
col1, col2 = st.columns(2)
|
83 |
|
84 |
# Save the uploaded file to a temporary location
|
@@ -88,17 +60,17 @@ def main():
|
|
88 |
|
89 |
with col1:
|
90 |
st.info("Uploaded PDF File")
|
91 |
-
|
92 |
|
93 |
with col2:
|
94 |
-
st.info("Summarization
|
95 |
-
|
|
|
96 |
st.success(summary)
|
97 |
|
98 |
-
#
|
99 |
st.header("Summarize Your Text")
|
100 |
user_input = st.text_area("Enter your content here:", height=200)
|
101 |
-
|
102 |
if st.button("Summarize Text"):
|
103 |
if user_input.strip():
|
104 |
col1, col2 = st.columns(2)
|
@@ -108,8 +80,8 @@ def main():
|
|
108 |
st.write(user_input)
|
109 |
|
110 |
with col2:
|
111 |
-
st.info("Summarization
|
112 |
-
summary =
|
113 |
st.success(summary)
|
114 |
else:
|
115 |
st.warning("Please enter some content to summarize.")
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
import base64
|
3 |
+
from tempfile import NamedTemporaryFile
|
4 |
import streamlit as st
|
|
|
|
|
|
|
5 |
from transformers import pipeline
|
6 |
+
from langchain.document_loaders import PyPDFLoader
|
7 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
8 |
|
9 |
# Load model directly
|
10 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
|
|
12 |
tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
|
13 |
base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
|
14 |
|
15 |
+
# File loader and processing
|
16 |
def file_preprocessing(file):
|
17 |
loader = PyPDFLoader(file)
|
18 |
pages = loader.load_and_split()
|
|
|
20 |
texts = text_splitter.split_documents(pages)
|
21 |
final_texts = ""
|
22 |
for text in texts:
|
23 |
+
final_texts += text.page_content
|
|
|
24 |
return final_texts
|
25 |
|
26 |
+
# LLM pipeline for summarization
|
27 |
+
def llm_pipeline(input_text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
pipe_sum = pipeline(
|
29 |
'summarization',
|
30 |
+
model=base_model,
|
31 |
+
tokenizer=tokenizer,
|
32 |
+
max_length=500,
|
33 |
+
min_length=50,
|
34 |
)
|
|
|
35 |
result = pipe_sum(input_text)
|
36 |
+
return result[0]['summary_text']
|
|
|
37 |
|
38 |
@st.cache_data
|
39 |
# Function to display the PDF file
|
40 |
+
def displayPDF(file_path):
|
41 |
+
with open(file_path, "rb") as f:
|
42 |
+
base64_pdf = base64.b64encode(f.read()).decode('utf-8')
|
43 |
+
pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
|
|
|
|
|
|
|
|
|
|
|
44 |
st.markdown(pdf_display, unsafe_allow_html=True)
|
45 |
|
46 |
+
# Streamlit App
|
|
|
|
|
|
|
47 |
def main():
|
48 |
st.title('Content Summarizer')
|
49 |
|
50 |
+
# PDF Upload Section
|
51 |
uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
|
|
|
52 |
if uploaded_file is not None:
|
53 |
+
if st.button("Summarize PDF"):
|
54 |
col1, col2 = st.columns(2)
|
55 |
|
56 |
# Save the uploaded file to a temporary location
|
|
|
60 |
|
61 |
with col1:
|
62 |
st.info("Uploaded PDF File")
|
63 |
+
displayPDF(temp_filepath)
|
64 |
|
65 |
with col2:
|
66 |
+
st.info("Summarization")
|
67 |
+
input_text = file_preprocessing(temp_filepath)
|
68 |
+
summary = llm_pipeline(input_text)
|
69 |
st.success(summary)
|
70 |
|
71 |
+
# Text Input Section
|
72 |
st.header("Summarize Your Text")
|
73 |
user_input = st.text_area("Enter your content here:", height=200)
|
|
|
74 |
if st.button("Summarize Text"):
|
75 |
if user_input.strip():
|
76 |
col1, col2 = st.columns(2)
|
|
|
80 |
st.write(user_input)
|
81 |
|
82 |
with col2:
|
83 |
+
st.info("Summarization")
|
84 |
+
summary = llm_pipeline(user_input)
|
85 |
st.success(summary)
|
86 |
else:
|
87 |
st.warning("Please enter some content to summarize.")
|