File size: 1,656 Bytes
fc672dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import pandas as pd
import streamlit as st
from keybert import KeyBERT
from samples import texts

@st.cache(allow_output_mutation=True, suppress_st_warning=True, show_spinner=True)
def load_model():
  model = KeyBERT("sentence-transformers/xlm-r-distilroberta-base-paraphrase-v1")
  return model
  
model = load_model()

placeholder = st.empty()
text_input = placeholder.text_area("Paste or write text", height=300)

top_n = st.sidebar.slider("Select a number of keywords", 1, 10, 5, 1)
min_ngram = st.sidebar.slider.number_input("Minimum number of words in each keyword", 1, 5, 1, 1)
max_ngram = st.sidebar.slider.number_input("Maximum number of words in each keyword", min_ngram, 5, 3, step=1)
st.sidebar.code(f"ngram_range=({min_ngram}, {max_ngram})")

params = {"docs": text_input, "top_n": top_n, "keyphrase_ngram_range": (min_ngram, max_ngram), "stop_words": 'english'}

add_diversity = st.sidebar.checkbox("Adjust diversity of keywords")

if add_diversity:
  method = st.sidebar.selectbox("Select a method", ("Max Sum Similarity", "Maximal Marginal Relevance"))
    if method == "Max Sum Similarity":
        nr_candidates = st.sidebar.slider("nr_candidates", 20, 50, 20, 2)
        params["use_maxsum"] = True
        params["nr_candidates"] = nr_candidates

    elif method == "Maximal Marginal Relevance":
        diversity = st.sidebar.slider("diversity", 0.1, 1.0, 0.6, 0.01)
        params["use_mmr"] = True
        params["diversity"] = diversity

keywords = model.extract_keywords(**params)

if keywords != []:
    st.info("Extracted keywords")
    keywords = pd.DataFrame(keywords, columns=["keyword", "relevance"])
    st.table(keywords)