Wootang01's picture
Update app.py
8161c74
import pandas as pd
import streamlit as st
from keybert import KeyBERT
@st.cache(allow_output_mutation=True, suppress_st_warning=True, show_spinner=True)
def load_model():
model = KeyBERT("sentence-transformers/xlm-r-distilroberta-base-paraphrase-v1")
return model
model = load_model()
placeholder = st.empty()
text_input = placeholder.text_area("Paste or write text", height=300)
top_n = st.sidebar.slider("Select a number of keywords", 1, 10, 5, 1)
min_ngram = st.sidebar.number_input("Minimum number of words in each keyword", 1, 5, 1, 1)
max_ngram = st.sidebar.number_input("Maximum number of words in each keyword", min_ngram, 5, 3, step=1)
st.sidebar.code(f"ngram_range=({min_ngram}, {max_ngram})")
params = {"docs": text_input, "top_n": top_n, "keyphrase_ngram_range": (min_ngram, max_ngram), "stop_words": 'english'}
add_diversity = st.sidebar.checkbox("Adjust diversity of keywords")
if add_diversity:
method = st.sidebar.selectbox("Select a method", ("Max Sum Similarity", "Maximal Marginal Relevance"))
if method == "Max Sum Similarity":
nr_candidates = st.sidebar.slider("nr_candidates", 20, 50, 20, 2)
params["use_maxsum"] = True
params["nr_candidates"] = nr_candidates
elif method == "Maximal Marginal Relevance":
diversity = st.sidebar.slider("diversity", 0.1, 1.0, 0.6, 0.01)
params["use_mmr"] = True
params["diversity"] = diversity
keywords = model.extract_keywords(**params)
if keywords != []:
st.info("Extracted keywords")
keywords = pd.DataFrame(keywords, columns=["keyword", "relevance"])
st.table(keywords)