import pandas as pd import streamlit as st from keybert import KeyBERT from samples import texts @st.cache(allow_output_mutation=True, suppress_st_warning=True, show_spinner=True) def load_model(): model = KeyBERT("sentence-transformers/xlm-r-distilroberta-base-paraphrase-v1") return model model = load_model() placeholder = st.empty() text_input = placeholder.text_area("Paste or write text", height=300) top_n = st.sidebar.slider("Select a number of keywords", 1, 10, 5, 1) min_ngram = st.sidebar.slider.number_input("Minimum number of words in each keyword", 1, 5, 1, 1) max_ngram = st.sidebar.slider.number_input("Maximum number of words in each keyword", min_ngram, 5, 3, step=1) st.sidebar.code(f"ngram_range=({min_ngram}, {max_ngram})") params = {"docs": text_input, "top_n": top_n, "keyphrase_ngram_range": (min_ngram, max_ngram), "stop_words": 'english'} add_diversity = st.sidebar.checkbox("Adjust diversity of keywords") if add_diversity: method = st.sidebar.selectbox("Select a method", ("Max Sum Similarity", "Maximal Marginal Relevance")) if method == "Max Sum Similarity": nr_candidates = st.sidebar.slider("nr_candidates", 20, 50, 20, 2) params["use_maxsum"] = True params["nr_candidates"] = nr_candidates elif method == "Maximal Marginal Relevance": diversity = st.sidebar.slider("diversity", 0.1, 1.0, 0.6, 0.01) params["use_mmr"] = True params["diversity"] = diversity keywords = model.extract_keywords(**params) if keywords != []: st.info("Extracted keywords") keywords = pd.DataFrame(keywords, columns=["keyword", "relevance"]) st.table(keywords)