import streamlit as st from transformers import pipeline import textwrap import re st.title('Hugging Face BERT Summarizer') # List of models models = ["sshleifer/distilbart-cnn-12-6", "facebook/bart-large-cnn", "t5-base", "t5-large", "google/pegasus-newsroom"] # Dropdown model selector model = st.sidebar.selectbox("Choose a model", models) uploaded_file = st.file_uploader("Choose a .txt file", type="txt") # Add text input for keywords keywords = st.text_input("Enter keywords (comma-separated)") # Add slider to the sidebar for the scale value scale_percentage = st.sidebar.slider('Scale %', min_value=1, max_value=100, value=50) # Add slider for the chunk size chunk_size = st.sidebar.slider('Chunk size', min_value=100, max_value=1000, value=500) if uploaded_file is not None and keywords: user_input = uploaded_file.read().decode('utf-8') keywords = [keyword.strip() for keyword in keywords.split(",")] # Filter sentences based on keywords sentences = re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', user_input) filtered_sentences = [sentence for sentence in sentences if any(keyword.lower() in sentence.lower() for keyword in keywords)] filtered_text = ' '.join(filtered_sentences) if st.button('Summarize'): summarizer = pipeline('summarization', model=model) summarized_text = "" # Split the filtered text into chunks of approximately the selected chunk size each chunks = textwrap.wrap(filtered_text, chunk_size) # Summarize each chunk for chunk in chunks: chunk_length = len(chunk.split()) min_length_percentage = max(scale_percentage - 10, 1) # Ensure min_length_percentage is not less than 1 max_length_percentage = min(scale_percentage + 10, 100) # Ensure max_length_percentage is not more than 100 min_length = max(int(chunk_length * min_length_percentage / 100), 1) # Calculate min_length based on the percentage of the chunk length max_length = int(chunk_length * max_length_percentage / 100) # Calculate max_length based on the percentage of the chunk length summarized = summarizer(chunk, max_length=max_length, min_length=min_length, do_sample=False) summarized_text += summarized[0]['summary_text'] + " " st.text_area('Summarized Text', summarized_text, height=200)