miesnerjacob commited on
Commit
4b75840
Β·
1 Parent(s): ef5720a

Add application files

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
emotion_detection.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
2
+ from transformers_interpret import SequenceClassificationExplainer
3
+ import torch
4
+ import pandas as pd
5
+
6
+
7
+ class EmotionDetection():
8
+ def __init__(self, chunksize=512):
9
+ hub_location = 'cardiffnlp/twitter-roberta-base-emotion'
10
+ self.tokenizer = AutoTokenizer.from_pretrained(hub_location)
11
+ self.model = AutoModelForSequenceClassification.from_pretrained(hub_location)
12
+ self.explainer = SequenceClassificationExplainer(self.model, self.tokenizer)
13
+
14
+ def justify(self, text):
15
+ """"""
16
+
17
+ word_attributions = self.explainer(text)
18
+ html = self.explainer.visualize("example.html")
19
+
20
+ return html
21
+
22
+ def classify(self, text):
23
+ """"""
24
+
25
+ tokens = self.tokenizer.encode_plus(text, add_special_tokens=False, return_tensors='pt')
26
+ outputs = self.model(**tokens)
27
+ probs = torch.nn.functional.softmax(outputs[0], dim=-1)
28
+ probs = probs.mean(dim=0).detach().numpy()
29
+ labels = list(self.model.config.id2label.values())
30
+ preds = pd.Series(probs, index=labels, name='Predicted Probability')
31
+
32
+ return preds
33
+
34
+ def run(self, text):
35
+ """"""
36
+
37
+ preds = self.classify(text)
38
+ html = self.justify(text)
39
+
40
+ return preds, html
keyword_extraction.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy
2
+ import pytextrank
3
+ import re
4
+ from operator import itemgetter
5
+
6
+
7
+ class KeywordExtractor:
8
+ def __init__(self):
9
+ self.nlp = spacy.load("en_core_web_sm")
10
+ self.nlp.add_pipe("textrank")
11
+
12
+ def get_keywords(self, text, max_keywords):
13
+ doc = self.nlp(text)
14
+
15
+ kws = [i.text for i in doc._.phrases[:max_keywords]]
16
+
17
+ return kws
18
+
19
+ def get_keyword_indicies(self, string_list, text):
20
+ out = []
21
+ for s in string_list:
22
+ indicies = [[m.start(), m.end()] for m in re.finditer(re.escape(s), text)]
23
+ out.extend(indicies)
24
+
25
+ return out
26
+
27
+ def merge_overlapping_indicies(self, indicies):
28
+ # Sort the array on the basis of start values of intervals.
29
+ indicies.sort()
30
+ stack = []
31
+ # insert first interval into stack
32
+ stack.append(indicies[0])
33
+ for i in indicies[1:]:
34
+ # Check for overlapping interval,
35
+ # if interval overlap
36
+ if (stack[-1][0] <= i[0] <= stack[-1][-1]) or (stack[-1][-1] == i[0]-1):
37
+ stack[-1][-1] = max(stack[-1][-1], i[-1])
38
+ else:
39
+ stack.append(i)
40
+ return stack
41
+
42
+ def merge_until_finished(self, indicies):
43
+ len_indicies = 0
44
+ while True:
45
+ merged = self.merge_overlapping_indicies(indicies)
46
+ if len_indicies == len(merged):
47
+ out_indicies = sorted(merged, key=itemgetter(0))
48
+ return out_indicies
49
+ else:
50
+ len_indicies = len(merged)
51
+
52
+ def get_annotation(self, text, indicies, kws):
53
+
54
+ # Convert indicies to list
55
+ # kws = kws + [i.lower() for i in kws]
56
+
57
+ arr = list(text)
58
+ for idx in sorted(indicies, reverse=True):
59
+ arr.insert(idx[0], "<kw>")
60
+ arr.insert(idx[1]+1, "XXXxxxXXXxxxXXX <kw>")
61
+ annotation = ''.join(arr)
62
+ split = annotation.split('<kw>')
63
+ final_annotation = [(x.replace('XXXxxxXXXxxxXXX ', ''), "KEY", "#26aaef") if "XXXxxxXXXxxxXXX" in x else x for x in split]
64
+
65
+ kws_check = []
66
+ for i in final_annotation:
67
+ if type(i) is tuple:
68
+ kws_check.append(i[0])
69
+
70
+ return final_annotation
71
+
72
+ def generate(self, text, max_keywords):
73
+
74
+ kws = self.get_keywords(text, max_keywords)
75
+
76
+ indicies = list(self.get_keyword_indicies(kws, text))
77
+ if indicies:
78
+ indicies_merged = self.merge_until_finished(indicies)
79
+ annotation = self.get_annotation(text, indicies_merged, kws)
80
+ else:
81
+ annotation = None
82
+
83
+ return annotation, kws
84
+
named_entity_recognition.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
2
+ from transformers import pipeline
3
+
4
+
5
+ class NamedEntityRecognition():
6
+ def __init__(self):
7
+ tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-english")
8
+ model = AutoModelForTokenClassification.from_pretrained("xlm-roberta-large-finetuned-conll03-english")
9
+ self.nlp = pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=True)
10
+
11
+ def get_annotation(self, preds, text):
12
+ splits = [0]
13
+ entities = {}
14
+ for i in preds:
15
+ splits.append(i['start'])
16
+ splits.append(i['end'])
17
+ entities[i['word']] = i['entity_group']
18
+
19
+ # Exclude bad preds
20
+ exclude = ['', '.', '. ', ' ']
21
+ for x in exclude:
22
+ if x in entities.keys():
23
+ entities.pop(x)
24
+
25
+ parts = [text[i:j] for i, j in zip(splits, splits[1:] + [None])]
26
+
27
+ final_annotation = [(x, entities[x], "") if x in entities.keys() else x for x in parts]
28
+
29
+ return final_annotation
30
+
31
+ def classify(self, text):
32
+ preds = self.nlp(text)
33
+ ner_annotation = self.get_annotation(preds, text)
34
+ return preds, ner_annotation
part_of_speech_tagging.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ from nltk.tokenize import word_tokenize
3
+ nltk.download('punkt')
4
+ nltk.download('averaged_perceptron_tagger')
5
+
6
+
7
+ class POSTagging():
8
+ def __init__(self):
9
+ pass
10
+
11
+ def classify(self, text):
12
+ text = word_tokenize(text)
13
+ preds = nltk.pos_tag(text)
14
+ return preds
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ news-please~=1.5.20
2
+ sklearn~=0.0
3
+ keybert~=0.5.1
4
+ tensorflow
5
+ tensorflow-hub~=0.12.0
6
+ nltk~=3.5
7
+ gradio~=3.0
8
+ typing-extensions==3.10.0.2
9
+ yake~=0.4.8
10
+ streamlit-option-menu~=0.3.2
11
+ streamlit-option-menu~=0.3.2
12
+ st-annotated-text~=3.0.0
13
+ transformers-interpret~=0.7.2
14
+ htbuilder==0.6.0
15
+ pytextrank
sentiment_analysis.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
2
+ from transformers_interpret import SequenceClassificationExplainer
3
+ import torch
4
+ import pandas as pd
5
+
6
+
7
+ class SentimentAnalysis():
8
+ def __init__(self):
9
+ # Load Tokenizer & Model
10
+ hub_location = 'cardiffnlp/twitter-roberta-base-sentiment'
11
+ self.tokenizer = AutoTokenizer.from_pretrained(hub_location)
12
+ self.model = AutoModelForSequenceClassification.from_pretrained(hub_location)
13
+
14
+ # Change model labels in config
15
+ self.model.config.id2label[0] = "Negative"
16
+ self.model.config.id2label[1] = "Neutral"
17
+ self.model.config.id2label[2] = "Positive"
18
+ self.model.config.label2id["Negative"] = self.model.config.label2id.pop("LABEL_0")
19
+ self.model.config.label2id["Neutral"] = self.model.config.label2id.pop("LABEL_1")
20
+ self.model.config.label2id["Positive"] = self.model.config.label2id.pop("LABEL_2")
21
+
22
+ # Instantiate explainer
23
+ self.explainer = SequenceClassificationExplainer(self.model, self.tokenizer)
24
+
25
+ def justify(self, text):
26
+ """"""
27
+
28
+ word_attributions = self.explainer(text)
29
+ html = self.explainer.visualize("example.html")
30
+
31
+ return html
32
+
33
+ def classify(self, text):
34
+ """"""
35
+
36
+ tokens = self.tokenizer.encode_plus(text, add_special_tokens=False, return_tensors='pt')
37
+ outputs = self.model(**tokens)
38
+ probs = torch.nn.functional.softmax(outputs[0], dim=-1)
39
+ probs = probs.mean(dim=0).detach().numpy()
40
+ preds = pd.Series(probs, index=["Negative", "Neutral", "Positive"], name='Predicted Probability')
41
+
42
+ return preds
43
+
44
+ def run(self, text):
45
+ """"""
46
+
47
+ preds = self.classify(text)
48
+ html = self.justify(text)
49
+
50
+ return preds, html
streamlit_app.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+ from text_annotation import annotated_text
4
+ from streamlit_option_menu import option_menu
5
+ from sentiment_analysis import SentimentAnalysis
6
+ from keyword_extraction import KeywordExtractor
7
+ from part_of_speech_tagging import POSTagging
8
+ from emotion_detection import EmotionDetection
9
+ from named_entity_recognition import NamedEntityRecognition
10
+
11
+ hide_streamlit_style = """
12
+ <style>
13
+ #MainMenu {visibility: hidden;}
14
+ footer {visibility: hidden;}
15
+ </style>
16
+ """
17
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
18
+
19
+
20
+ @st.cache(allow_output_mutation=True)
21
+ def load_sentiment_model():
22
+ return SentimentAnalysis()
23
+
24
+ @st.cache(allow_output_mutation=True)
25
+ def load_keyword_model():
26
+ return KeywordExtractor()
27
+
28
+ @st.cache(allow_output_mutation=True)
29
+ def load_pos_model():
30
+ return POSTagging()
31
+
32
+ @st.cache(allow_output_mutation=True)
33
+ def load_emotion_model():
34
+ return EmotionDetection()
35
+
36
+ @st.cache(allow_output_mutation=True)
37
+ def load_ner_model():
38
+ return NamedEntityRecognition()
39
+
40
+
41
+ sentiment_analyzer = load_sentiment_model()
42
+ keyword_extractor = load_keyword_model()
43
+ pos_tagger = load_pos_model()
44
+ emotion_detector = load_emotion_model()
45
+ ner = load_ner_model()
46
+
47
+ with st.sidebar:
48
+ page = option_menu(menu_title='Menu',
49
+ menu_icon="robot",
50
+ options=["Welcome!",
51
+ "Sentiment Analysis",
52
+ "Keyword Extraction",
53
+ "Part of Speech Tagging",
54
+ "Emotion Detection",
55
+ "Named Entity Recognition"],
56
+ icons=["house-door",
57
+ "emoji-heart-eyes",
58
+ "key",
59
+ "chat-dots",
60
+ "emoji-heart-eyes",
61
+ "building"],
62
+ default_index=0
63
+ )
64
+
65
+ st.title('Open-source NLP')
66
+
67
+ if page == "Welcome!":
68
+ st.header('Welcome!')
69
+ st.write(
70
+ """
71
+ Supercharge your workflow with this platform built using 100% open-source resources!
72
+ """
73
+ )
74
+
75
+ st.markdown("![Alt Text](https://media.giphy.com/media/2fEvoZ9tajMxq/giphy.gif)")
76
+ st.write(
77
+ """
78
+
79
+
80
+ """
81
+ )
82
+ st.subheader("Introduction")
83
+ st.write("""
84
+ Welcome! This application is a celebration of open-source and the power that programmers have been granted today
85
+ by those who give back to the community. This tool was constructed using Streamlit, Huggingface Transformers,
86
+ Transformers-Interpret, NLTK, Spacy, amongst other open-source Python libraries and models.
87
+
88
+ Utilizing this tool you will be able to perform a multitude of Natural Language Processing Tasks on a range of
89
+ different tasks. All you need to do is paste your input, select your task, and hit the start button!
90
+
91
+ * This application currently supports:
92
+ * Sentiment Analysis
93
+ * Keyword Extraction
94
+ * Part of Speech Tagging
95
+ * Emotion Detection
96
+ * Named Entity Recognition
97
+
98
+ More features may be added in the future, depending on community feedback. Please reach out to me at
99
+ [email protected] or at my Linkedin page listed below if you have ideas or suggestions for improvement.
100
+
101
+ If you would like to contribute yourself, feel free to fork the Github repository listed below and submit a merge request.
102
+ """
103
+ )
104
+ st.subheader("Notes")
105
+ st.write(
106
+ """
107
+ * This dashboard was contsructed by Jacob Miesner, but every resource used is open-source! If you are interested
108
+ in his other works you can view them here:
109
+
110
+ [Project Github](https://github.com/MiesnerJacob/nlp-dashboard)
111
+
112
+ [Jacob Miesner's Github](https://github.com/MiesnerJacob)
113
+
114
+ [Jacob Miesner's Linkedin](https://www.linkedin.com/in/jacob-miesner-885050125/)
115
+
116
+ [Jacob Miesner's Website](https://www.jacobmiesner.com)
117
+
118
+ * The prediction justification for some of the tasks are printed as the model views them. For this reason the text
119
+ may contain special tokens like [CLS] or [SEP] or even hashtags splitting words. If you are knowledgeable about
120
+ language models and how they work these will be familiar, if you do not have prior experience with language models
121
+ you can ignore these characters.
122
+ """
123
+ )
124
+
125
+ elif page == "Sentiment Analysis":
126
+ st.header('Sentiment Analysis')
127
+ st.markdown("![Alt Text](https://media.giphy.com/media/XIqCQx02E1U9W/giphy.gif)")
128
+ st.write(
129
+ """
130
+
131
+
132
+ """
133
+ )
134
+
135
+ text = st.text_area("Paste text here", value="")
136
+
137
+ if st.button('Start!'):
138
+ with st.spinner("Loading..."):
139
+ preds, html = sentiment_analyzer.run(text)
140
+ st.success('All done!')
141
+ st.write("")
142
+ st.subheader("Sentiment Predictions")
143
+ st.bar_chart(data=preds, width=0, height=0, use_container_width=True)
144
+ st.write("")
145
+ st.subheader("Sentiment Justification")
146
+ raw_html = html._repr_html_()
147
+ st.components.v1.html(raw_html)
148
+
149
+ elif page == "Keyword Extraction":
150
+ st.header('Keyword Extraction')
151
+ st.markdown("![Alt Text](https://media.giphy.com/media/xT9C25UNTwfZuk85WP/giphy-downsized-large.gif)")
152
+ st.write(
153
+ """
154
+
155
+
156
+ """
157
+ )
158
+
159
+ text = st.text_area("Paste text here", value="")
160
+
161
+ max_keywords = st.slider('# of Keywords Max Limit', min_value=1, max_value=10, value=5, step=1)
162
+
163
+ if st.button('Start!'):
164
+ with st.spinner("Loading..."):
165
+ annotation, keywords = keyword_extractor.generate(text, max_keywords)
166
+ st.success('All done!')
167
+
168
+ if annotation:
169
+ st.subheader("Keyword Annotation")
170
+ st.write("")
171
+ annotated_text(*annotation)
172
+ st.text("")
173
+
174
+ st.subheader("Extracted Keywords")
175
+ st.write("")
176
+ df = pd.DataFrame(keywords, columns=['Extracted Keywords'])
177
+ csv = df.to_csv(index=False).encode('utf-8')
178
+ st.download_button('Download Keywords to CSV', csv, file_name='news_intelligence_keywords.csv')
179
+
180
+ data_table = st.table(df)
181
+
182
+ elif page == "Part of Speech Tagging":
183
+ st.header('Part of Speech Tagging')
184
+ st.markdown("![Alt Text](https://media.giphy.com/media/WoWm8YzFQJg5i/giphy.gif)")
185
+ st.write(
186
+ """
187
+
188
+
189
+ """
190
+ )
191
+
192
+ text = st.text_area("Paste text here", value="")
193
+
194
+ if st.button('Start!'):
195
+ with st.spinner("Loading..."):
196
+ preds = pos_tagger.classify(text)
197
+ st.success('All done!')
198
+ st.write("")
199
+ st.subheader("Part of Speech tags")
200
+ annotated_text(*preds)
201
+ st.write("")
202
+ st.components.v1.iframe('https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html', height=1000)
203
+
204
+ elif page == "Emotion Detection":
205
+ st.header('Emotion Detection')
206
+ st.markdown("![Alt Text](https://media.giphy.com/media/fU8X6ozSszyEw/giphy.gif)")
207
+ st.write(
208
+ """
209
+
210
+
211
+ """
212
+ )
213
+
214
+ text = st.text_area("Paste text here", value="")
215
+
216
+ if st.button('Start!'):
217
+ with st.spinner("Loading..."):
218
+ preds, html = emotion_detector.run(text)
219
+ st.success('All done!')
220
+ st.write("")
221
+ st.subheader("Emotion Predictions")
222
+ st.bar_chart(data=preds, width=0, height=0, use_container_width=True)
223
+ raw_html = html._repr_html_()
224
+ st.write("")
225
+ st.subheader("Emotion Justification")
226
+ st.components.v1.html(raw_html, height=500)
227
+
228
+ elif page == "Named Entity Recognition":
229
+ st.header('Named Entity Recognition')
230
+ st.markdown("![Alt Text](https://media.giphy.com/media/lxO8wdWdu4tig/giphy.gif)")
231
+ st.write(
232
+ """
233
+
234
+
235
+ """
236
+ )
237
+
238
+ text = st.text_area("Paste text here", value="")
239
+
240
+ if st.button('Start!'):
241
+ with st.spinner("Loading..."):
242
+ preds, ner_annotation = ner.classify(text)
243
+ st.success('All done!')
244
+ st.write("")
245
+ st.subheader("NER Predictions")
246
+ annotated_text(*ner_annotation)
247
+ st.write("")
248
+ st.subheader("NER Prediction Metadata")
249
+ st.write(preds)
text_annotation.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from text_annotation_utils import *
3
+
4
+ def annotated_text(*args, type=None):
5
+ """Writes text with annotations into your Streamlit app.
6
+ Parameters
7
+ ----------
8
+ *args : str, tuple or htbuilder.HtmlElement
9
+ Arguments can be:
10
+ - strings, to draw the string as-is on the screen.
11
+ - tuples of the form (main_text, annotation_text, background, color) where
12
+ background and foreground colors are optional and should be an CSS-valid string such as
13
+ "#aabbcc" or "rgb(10, 20, 30)"
14
+ - HtmlElement objects in case you want to customize the annotations further. In particular,
15
+ you can import the `annotation()` function from this module to easily produce annotations
16
+ whose CSS you can customize via keyword arguments.
17
+ Examples
18
+ --------
19
+ # >>> annotated_text(
20
+ # ... "This ",
21
+ # ... ("is", "verb", "#8ef"),
22
+ # ... " some ",
23
+ # ... ("annotated", "adj", "#faa"),
24
+ # ... ("text", "noun", "#afa"),
25
+ # ... " for those of ",
26
+ # ... ("you", "pronoun", "#fea"),
27
+ # ... " who ",
28
+ # ... ("like", "verb", "#8ef"),
29
+ # ... " this sort of ",
30
+ # ... ("thing", "noun", "#afa"),
31
+ # ... )
32
+ # >>> annotated_text(
33
+ # ... "Hello ",
34
+ # ... annotation("world!", "noun", color="#8ef", border="1px dashed red"),
35
+ # ... )
36
+ """
37
+ if type == 'title':
38
+ st.markdown(
39
+ '<p class="big-font">' + get_annotated_html(*args)+ '</p>',
40
+ unsafe_allow_html=True,
41
+ )
42
+ if type == 'description':
43
+ st.markdown(
44
+ '<p class="medium-font">' + get_annotated_html(*args) + '</p>',
45
+ unsafe_allow_html=True,
46
+ )
47
+ else:
48
+ st.markdown(
49
+ get_annotated_html(*args),
50
+ unsafe_allow_html=True,
51
+ )
text_annotation_utils.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import html
2
+ from htbuilder import H, HtmlElement, styles
3
+ from htbuilder.units import unit
4
+
5
+ # Only works in 3.7+: from htbuilder import div, span
6
+ div = H.div
7
+ span = H.span
8
+
9
+ # Only works in 3.7+: from htbuilder.units import px, rem, em
10
+ px = unit.px
11
+ rem = unit.rem
12
+ em = unit.em
13
+
14
+ # Colors from the Streamlit palette.
15
+ # These are red-70, orange-70, ..., violet-70, gray-70.
16
+ PALETTE = [
17
+ "#ff4b4b",
18
+ "#ffa421",
19
+ "#ffe312",
20
+ "#21c354",
21
+ "#00d4b1",
22
+ "#00c0f2",
23
+ "#1c83e1",
24
+ "#803df5",
25
+ "#808495",
26
+ ]
27
+
28
+ OPACITIES = [
29
+ "33", "66",
30
+ ]
31
+
32
+ def annotation(body, label="", background=None, color=None, **style):
33
+ """Build an HtmlElement span object with the given body and annotation label.
34
+ The end result will look something like this:
35
+ [body | label]
36
+ Parameters
37
+ ----------
38
+ body : string
39
+ The string to put in the "body" part of the annotation.
40
+ label : string
41
+ The string to put in the "label" part of the annotation.
42
+ background : string or None
43
+ The color to use for the background "chip" containing this annotation.
44
+ If None, will use a random color based on the label.
45
+ color : string or None
46
+ The color to use for the body and label text.
47
+ If None, will use the document's default text color.
48
+ style : dict
49
+ Any CSS you want to apply to the containing "chip". This is useful for things like
50
+ Examples
51
+ --------
52
+ Produce a simple annotation with default colors:
53
+ # >>> annotation("apple", "fruit")
54
+ Produce an annotation with custom colors:
55
+ # >>> annotation("apple", "fruit", background="#FF0", color="black")
56
+ Produce an annotation with crazy CSS:
57
+ # >>> annotation("apple", "fruit", background="#FF0", border="1px dashed red")
58
+ """
59
+
60
+ color_style = {}
61
+
62
+ if color:
63
+ color_style['color'] = color
64
+
65
+ if not background:
66
+ label_sum = sum(ord(c) for c in label)
67
+ background_color = PALETTE[label_sum % len(PALETTE)]
68
+ background_opacity = OPACITIES[label_sum % len(OPACITIES)]
69
+ background = background_color + background_opacity
70
+
71
+ return (
72
+ span(
73
+ style=styles(
74
+ background=background,
75
+ border_radius=rem(0.33),
76
+ padding=(rem(0.125), rem(0.5)),
77
+ overflow="hidden",
78
+ **color_style,
79
+ **style,
80
+ ))(
81
+
82
+ html.escape(body),
83
+
84
+ span(
85
+ style=styles(
86
+ padding_left=rem(0.5),
87
+ text_transform="uppercase",
88
+ ))(
89
+ span(
90
+ style=styles(
91
+ font_size=em(0.67),
92
+ opacity=0.5,
93
+ ))(
94
+ html.escape(label),
95
+ ),
96
+ ),
97
+ )
98
+ )
99
+
100
+
101
+ def get_annotated_html(*args):
102
+ """Writes text with annotations into an HTML string.
103
+ Parameters
104
+ ----------
105
+ *args : see annotated_text()
106
+ Returns
107
+ -------
108
+ str
109
+ An HTML string.
110
+ """
111
+
112
+ out = div()
113
+
114
+ for arg in args:
115
+ if isinstance(arg, str):
116
+ out(html.escape(arg))
117
+
118
+ elif isinstance(arg, HtmlElement):
119
+ out(arg)
120
+
121
+ elif isinstance(arg, tuple):
122
+ out(annotation(*arg))
123
+
124
+ else:
125
+ raise Exception("Bad input")
126
+
127
+ return str(out)