Wootang01 commited on
Commit
fc672dd
·
1 Parent(s): 4df0899

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -0
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+ from keybert import KeyBERT
4
+ from samples import texts
5
+
6
+ @st.cache(allow_output_mutation=True, suppress_st_warning=True, show_spinner=True)
7
+ def load_model():
8
+ model = KeyBERT("sentence-transformers/xlm-r-distilroberta-base-paraphrase-v1")
9
+ return model
10
+
11
+ model = load_model()
12
+
13
+ placeholder = st.empty()
14
+ text_input = placeholder.text_area("Paste or write text", height=300)
15
+
16
+ top_n = st.sidebar.slider("Select a number of keywords", 1, 10, 5, 1)
17
+ min_ngram = st.sidebar.slider.number_input("Minimum number of words in each keyword", 1, 5, 1, 1)
18
+ max_ngram = st.sidebar.slider.number_input("Maximum number of words in each keyword", min_ngram, 5, 3, step=1)
19
+ st.sidebar.code(f"ngram_range=({min_ngram}, {max_ngram})")
20
+
21
+ params = {"docs": text_input, "top_n": top_n, "keyphrase_ngram_range": (min_ngram, max_ngram), "stop_words": 'english'}
22
+
23
+ add_diversity = st.sidebar.checkbox("Adjust diversity of keywords")
24
+
25
+ if add_diversity:
26
+ method = st.sidebar.selectbox("Select a method", ("Max Sum Similarity", "Maximal Marginal Relevance"))
27
+ if method == "Max Sum Similarity":
28
+ nr_candidates = st.sidebar.slider("nr_candidates", 20, 50, 20, 2)
29
+ params["use_maxsum"] = True
30
+ params["nr_candidates"] = nr_candidates
31
+
32
+ elif method == "Maximal Marginal Relevance":
33
+ diversity = st.sidebar.slider("diversity", 0.1, 1.0, 0.6, 0.01)
34
+ params["use_mmr"] = True
35
+ params["diversity"] = diversity
36
+
37
+ keywords = model.extract_keywords(**params)
38
+
39
+ if keywords != []:
40
+ st.info("Extracted keywords")
41
+ keywords = pd.DataFrame(keywords, columns=["keyword", "relevance"])
42
+ st.table(keywords)
43
+