blazingbunny commited on
Commit
4b1ed8b
·
1 Parent(s): 20be358

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -3
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
2
  from transformers import pipeline
3
  import textwrap
 
4
 
5
  st.title('Hugging Face BERT Summarizer')
6
 
@@ -12,18 +13,27 @@ model = st.sidebar.selectbox("Choose a model", models)
12
 
13
  uploaded_file = st.file_uploader("Choose a .txt file", type="txt")
14
 
 
 
 
15
  # Add slider to the sidebar for the scale value
16
  scale_percentage = st.sidebar.slider('Scale %', min_value=1, max_value=100, value=50)
17
 
18
- if uploaded_file is not None:
19
  user_input = uploaded_file.read().decode('utf-8')
 
 
 
 
 
 
20
 
21
  if st.button('Summarize'):
22
  summarizer = pipeline('summarization', model=model)
23
  summarized_text = ""
24
 
25
- # Split the text into chunks of approximately 500 words each
26
- chunks = textwrap.wrap(user_input, 500)
27
 
28
  # Summarize each chunk
29
  for chunk in chunks:
 
1
  import streamlit as st
2
  from transformers import pipeline
3
  import textwrap
4
+ import re
5
 
6
  st.title('Hugging Face BERT Summarizer')
7
 
 
13
 
14
  uploaded_file = st.file_uploader("Choose a .txt file", type="txt")
15
 
16
+ # Add text input for keywords
17
+ keywords = st.text_input("Enter keywords (comma-separated)")
18
+
19
  # Add slider to the sidebar for the scale value
20
  scale_percentage = st.sidebar.slider('Scale %', min_value=1, max_value=100, value=50)
21
 
22
+ if uploaded_file is not None and keywords:
23
  user_input = uploaded_file.read().decode('utf-8')
24
+ keywords = [keyword.strip() for keyword in keywords.split(",")]
25
+
26
+ # Filter sentences based on keywords
27
+ sentences = re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', user_input)
28
+ filtered_sentences = [sentence for sentence in sentences if any(keyword.lower() in sentence.lower() for keyword in keywords)]
29
+ filtered_text = ' '.join(filtered_sentences)
30
 
31
  if st.button('Summarize'):
32
  summarizer = pipeline('summarization', model=model)
33
  summarized_text = ""
34
 
35
+ # Split the filtered text into chunks of approximately 500 words each
36
+ chunks = textwrap.wrap(filtered_text, 500)
37
 
38
  # Summarize each chunk
39
  for chunk in chunks: