Spaces:

blazingbunny
/

BERT-Extractive-Summarizer

Running

blazingbunny commited on Aug 5, 2023

Commit

4b1ed8b

1 Parent(s): 20be358

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import streamlit as st
 from transformers import pipeline
 import textwrap
 st.title('Hugging Face BERT Summarizer')
@@ -12,18 +13,27 @@ model = st.sidebar.selectbox("Choose a model", models)
 uploaded_file = st.file_uploader("Choose a .txt file", type="txt")
 # Add slider to the sidebar for the scale value
 scale_percentage = st.sidebar.slider('Scale %', min_value=1, max_value=100, value=50)
-if uploaded_file is not None:
     user_input = uploaded_file.read().decode('utf-8')
     if st.button('Summarize'):
         summarizer = pipeline('summarization', model=model)
         summarized_text = ""
-        # Split the text into chunks of approximately 500 words each
-        chunks = textwrap.wrap(user_input, 500)
         # Summarize each chunk
         for chunk in chunks:

 import streamlit as st
 from transformers import pipeline
 import textwrap
+import re
 st.title('Hugging Face BERT Summarizer')
 uploaded_file = st.file_uploader("Choose a .txt file", type="txt")
+# Add text input for keywords
+keywords = st.text_input("Enter keywords (comma-separated)")
 # Add slider to the sidebar for the scale value
 scale_percentage = st.sidebar.slider('Scale %', min_value=1, max_value=100, value=50)
+if uploaded_file is not None and keywords:
     user_input = uploaded_file.read().decode('utf-8')
+    keywords = [keyword.strip() for keyword in keywords.split(",")]
+    # Filter sentences based on keywords
+    sentences = re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', user_input)
+    filtered_sentences = [sentence for sentence in sentences if any(keyword.lower() in sentence.lower() for keyword in keywords)]
+    filtered_text = ' '.join(filtered_sentences)
     if st.button('Summarize'):
         summarizer = pipeline('summarization', model=model)
         summarized_text = ""
+        # Split the filtered text into chunks of approximately 500 words each
+        chunks = textwrap.wrap(filtered_text, 500)
         # Summarize each chunk
         for chunk in chunks: