Files changed (1) hide show
  1. app.py +27 -55
app.py CHANGED
@@ -1,15 +1,10 @@
1
- from dotenv import load_dotenv
2
- load_dotenv()
3
- from tempfile import NamedTemporaryFile
4
  import os
 
 
5
  import streamlit as st
6
- from langchain.text_splitter import RecursiveCharacterTextSplitter
7
- from langchain.document_loaders import PyPDFLoader,DirectoryLoader
8
- from langchain.chains.summarize import load_summarize_chain
9
  from transformers import pipeline
10
- import torch
11
- import base64
12
-
13
 
14
  # Load model directly
15
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
@@ -17,7 +12,7 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
17
  tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
18
  base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
19
 
20
- #file loader and processing
21
  def file_preprocessing(file):
22
  loader = PyPDFLoader(file)
23
  pages = loader.load_and_split()
@@ -25,60 +20,37 @@ def file_preprocessing(file):
25
  texts = text_splitter.split_documents(pages)
26
  final_texts = ""
27
  for text in texts:
28
- print(text)
29
- final_texts = final_texts + text.page_content
30
  return final_texts
31
 
32
- #lm pipeline
33
- def llm_pipleline(filepath):
34
- pipe_sum = pipeline(
35
- 'summarization',
36
- model = base_model,
37
- tokenizer = tokenizer,
38
- max_length = 500,
39
- min_length = 50
40
- )
41
- input_text = file_preprocessing(filepath)
42
- result = pipe_sum(input_text)
43
- result = result[0]['summary_text']
44
- return result
45
- def llm_pipleline1(ans):
46
  pipe_sum = pipeline(
47
  'summarization',
48
- model = base_model,
49
- tokenizer = tokenizer,
50
- max_length = 500,
51
- min_length = 50
52
  )
53
- input_text =""+ ans
54
  result = pipe_sum(input_text)
55
- result = result[0]['summary_text']
56
- return result
57
 
58
  @st.cache_data
59
  # Function to display the PDF file
60
- def displayPDF(file):
61
- # Opening file from file path
62
- with open(file, "rb") as f:
63
- base_pdf = base64.b64encode(f.read()).decode('utf-8') # Corrected function name and variable
64
-
65
- # Embedding PDF in HTML
66
- pdf_display = f'<iframe src="data:application/pdf;base64,{base_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
67
-
68
- # Displaying the file
69
  st.markdown(pdf_display, unsafe_allow_html=True)
70
 
71
-
72
- #streamlit code
73
- st.set_page_config(layout='wide')
74
-
75
  def main():
76
  st.title('Content Summarizer')
77
 
 
78
  uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
79
-
80
  if uploaded_file is not None:
81
- if st.button("Summarize"):
82
  col1, col2 = st.columns(2)
83
 
84
  # Save the uploaded file to a temporary location
@@ -88,17 +60,17 @@ def main():
88
 
89
  with col1:
90
  st.info("Uploaded PDF File")
91
- pdf_viewer = displayPDF(temp_filepath)
92
 
93
  with col2:
94
- st.info("Summarization is below")
95
- summary = llm_pipleline(temp_filepath)
 
96
  st.success(summary)
97
 
98
- # New Section for Text Input Summarization
99
  st.header("Summarize Your Text")
100
  user_input = st.text_area("Enter your content here:", height=200)
101
-
102
  if st.button("Summarize Text"):
103
  if user_input.strip():
104
  col1, col2 = st.columns(2)
@@ -108,8 +80,8 @@ def main():
108
  st.write(user_input)
109
 
110
  with col2:
111
- st.info("Summarization is below")
112
- summary = llm_pipleline1(user_input)
113
  st.success(summary)
114
  else:
115
  st.warning("Please enter some content to summarize.")
 
 
 
 
1
  import os
2
+ import base64
3
+ from tempfile import NamedTemporaryFile
4
  import streamlit as st
 
 
 
5
  from transformers import pipeline
6
+ from langchain.document_loaders import PyPDFLoader
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
 
8
 
9
  # Load model directly
10
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
12
  tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
13
  base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
14
 
15
+ # File loader and processing
16
  def file_preprocessing(file):
17
  loader = PyPDFLoader(file)
18
  pages = loader.load_and_split()
 
20
  texts = text_splitter.split_documents(pages)
21
  final_texts = ""
22
  for text in texts:
23
+ final_texts += text.page_content
 
24
  return final_texts
25
 
26
+ # LLM pipeline for summarization
27
+ def llm_pipeline(input_text):
 
 
 
 
 
 
 
 
 
 
 
 
28
  pipe_sum = pipeline(
29
  'summarization',
30
+ model=base_model,
31
+ tokenizer=tokenizer,
32
+ max_length=500,
33
+ min_length=50,
34
  )
 
35
  result = pipe_sum(input_text)
36
+ return result[0]['summary_text']
 
37
 
38
  @st.cache_data
39
  # Function to display the PDF file
40
+ def displayPDF(file_path):
41
+ with open(file_path, "rb") as f:
42
+ base64_pdf = base64.b64encode(f.read()).decode('utf-8')
43
+ pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
 
 
 
 
 
44
  st.markdown(pdf_display, unsafe_allow_html=True)
45
 
46
+ # Streamlit App
 
 
 
47
  def main():
48
  st.title('Content Summarizer')
49
 
50
+ # PDF Upload Section
51
  uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
 
52
  if uploaded_file is not None:
53
+ if st.button("Summarize PDF"):
54
  col1, col2 = st.columns(2)
55
 
56
  # Save the uploaded file to a temporary location
 
60
 
61
  with col1:
62
  st.info("Uploaded PDF File")
63
+ displayPDF(temp_filepath)
64
 
65
  with col2:
66
+ st.info("Summarization")
67
+ input_text = file_preprocessing(temp_filepath)
68
+ summary = llm_pipeline(input_text)
69
  st.success(summary)
70
 
71
+ # Text Input Section
72
  st.header("Summarize Your Text")
73
  user_input = st.text_area("Enter your content here:", height=200)
 
74
  if st.button("Summarize Text"):
75
  if user_input.strip():
76
  col1, col2 = st.columns(2)
 
80
  st.write(user_input)
81
 
82
  with col2:
83
+ st.info("Summarization")
84
+ summary = llm_pipeline(user_input)
85
  st.success(summary)
86
  else:
87
  st.warning("Please enter some content to summarize.")