Files changed (1) hide show
  1. app.py +115 -0
app.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ load_dotenv()
3
+
4
+ import streamlit as st
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.document_loaders import PyPDFLoader,DirectoryLoader
7
+ from langchain.chains.summarize import load_summarize_chain
8
+ from transformers import pipeline
9
+ import torch
10
+ import base64
11
+
12
+
13
+ # Load model directly
14
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
15
+
16
+ tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
17
+ base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
18
+
19
+ #file loader and processing
20
+ def file_preprocessing(file):
21
+ loader = PyPDFLoader(file)
22
+ pages = loader.load_and_split()
23
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
24
+ texts = text_splitter.split_documents(pages)
25
+ final_texts = ""
26
+ for text in texts:
27
+ print(text)
28
+ final_texts = final_texts + text.page_content
29
+ return final_texts
30
+
31
+ #lm pipeline
32
+ def llm_pipleline(filepath):
33
+ pipe_sum = pipeline(
34
+ 'summarization',
35
+ model = base_model,
36
+ tokenizer = tokenizer,
37
+ max_length = 500,
38
+ min_length = 50
39
+ )
40
+ input_text = file_preprocessing(filepath)
41
+ result = pipe_sum(input_text)
42
+ result = result[0]['summary_text']
43
+ return result
44
+ def llm_pipleline1(ans):
45
+ pipe_sum = pipeline(
46
+ 'summarization',
47
+ model = base_model,
48
+ tokenizer = tokenizer,
49
+ max_length = 500,
50
+ min_length = 50
51
+ )
52
+ input_text =""+ ans
53
+ result = pipe_sum(input_text)
54
+ result = result[0]['summary_text']
55
+ return result
56
+
57
+ @st.cache_data
58
+ #function to display the pdf file
59
+ def displayPDF(file):
60
+ #opening file from file path
61
+ with open(file, "rb") as f:
62
+ base_pdf = base64.bb64encode(f.read()).decode('utf-8')
63
+
64
+ #embedding pdf in html
65
+ pdf_display = F'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
66
+
67
+ #displaying file
68
+ st.markdown(pdf_display, unsafe_allow_html=True)
69
+
70
+ #streamlit code
71
+ st.set_page_config(layout='wide')
72
+
73
+ def main():
74
+
75
+ st.title('Content sumerizer')
76
+
77
+ uploaded_file = st.file_uploader("upload your pdf file", type=['pdf'])
78
+
79
+ if uploaded_file is not None:
80
+ if st.button("summarize"):
81
+ col1,col2 = st.columns(2)
82
+ filepath = "home/user/app"+uploaded_file.name
83
+ with open(filepath, 'wb') as temp_file:
84
+ temp_file.write(uploaded_file.read())
85
+ with col1:
86
+ st.info("uploaded PDF File")
87
+ pdf_viewer = displayPDF(filepath)
88
+
89
+
90
+ with col2:
91
+ st.info("Summarization is below")
92
+ summary = llm_pipleline(filepath)
93
+ st.success(summary)
94
+ else :
95
+ print("enter a valid pdf file")
96
+
97
+ if st.button("text"):
98
+ ans = input("enter your content")
99
+ if st.button("Enter"):
100
+ col1,col2 = st.columns(2)
101
+
102
+ with col1:
103
+ st.info("what you have entered")
104
+ print(ans)
105
+ with col2:
106
+ st.info("Summarization is below")
107
+ summary1=llm_pipleline1(ans)
108
+ st.success(summary1)
109
+
110
+
111
+
112
+
113
+
114
+ if __name__ == '__main__':
115
+ main()