Spaces:

mishrasahil934
/

Team_skulk

Running

App Files Files Community

create app.py

by mishrasahil934 - opened 25 days ago

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+115

-0

Files changed (1) hide show

app.py +115 -0

app.py ADDED Viewed

	@@ -0,0 +1,115 @@

+from dotenv import load_dotenv
+load_dotenv()
+import streamlit as st
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.document_loaders import PyPDFLoader,DirectoryLoader
+from langchain.chains.summarize import load_summarize_chain
+from transformers import pipeline
+import torch
+import base64
+# Load model directly
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+tokenizer = AutoTokenizer.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
+base_model = AutoModelForSeq2SeqLM.from_pretrained("MBZUAI/LaMini-Flan-T5-248M")
+#file loader and processing
+def file_preprocessing(file):
+    loader = PyPDFLoader(file)
+    pages = loader.load_and_split()
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
+    texts = text_splitter.split_documents(pages)
+    final_texts = ""
+    for text in texts:
+        print(text)
+        final_texts = final_texts + text.page_content
+    return final_texts
+#lm pipeline
+def llm_pipleline(filepath):
+    pipe_sum = pipeline(
+        'summarization',
+        model = base_model,
+        tokenizer = tokenizer,
+        max_length = 500,
+        min_length = 50
+    )
+    input_text = file_preprocessing(filepath)
+    result = pipe_sum(input_text)
+    result = result[0]['summary_text']
+    return result
+def llm_pipleline1(ans):
+    pipe_sum = pipeline(
+        'summarization',
+        model = base_model,
+        tokenizer = tokenizer,
+        max_length = 500,
+        min_length = 50
+    )
+    input_text =""+ ans
+    result = pipe_sum(input_text)
+    result = result[0]['summary_text']
+    return result
+@st.cache_data
+#function to display the pdf file
+def displayPDF(file):
+    #opening file from file path
+    with open(file, "rb") as f:
+        base_pdf = base64.bb64encode(f.read()).decode('utf-8')
+#embedding pdf in html
+pdf_display = F'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
+#displaying file
+st.markdown(pdf_display, unsafe_allow_html=True)
+#streamlit code
+st.set_page_config(layout='wide')
+def main():
+    st.title('Content sumerizer')
+    uploaded_file = st.file_uploader("upload your pdf file", type=['pdf'])
+    if uploaded_file is not None:
+        if st.button("summarize"):
+            col1,col2 = st.columns(2)
+            filepath = "home/user/app"+uploaded_file.name
+            with open(filepath, 'wb') as temp_file:
+                temp_file.write(uploaded_file.read())
+            with col1:
+                st.info("uploaded PDF File")
+                pdf_viewer = displayPDF(filepath)
+            with col2:
+                st.info("Summarization is below")
+                summary = llm_pipleline(filepath)
+                st.success(summary)
+    else :
+        print("enter a valid pdf file")
+    if st.button("text"):
+        ans = input("enter your content")
+        if st.button("Enter"):
+            col1,col2 = st.columns(2)
+            with col1:
+                st.info("what you have entered")
+                print(ans)
+            with col2:
+                st.info("Summarization is below")
+                summary1=llm_pipleline1(ans)
+                st.success(summary1)
+if __name__ == '__main__':
+    main()