nickmuchi commited on
Commit
770e1bd
·
1 Parent(s): c230efc

Update functions.py

Browse files
Files changed (1) hide show
  1. functions.py +18 -17
functions.py CHANGED
@@ -97,7 +97,7 @@ prompt = ChatPromptTemplate.from_messages(messages)
97
 
98
  ###################### Functions #######################################################################################
99
 
100
- @st.experimental_singleton(suppress_st_warning=True)
101
  def load_models():
102
 
103
  '''Load and cache all the models to be used'''
@@ -116,20 +116,20 @@ def load_models():
116
 
117
  return sent_pipe, sum_pipe, ner_pipe, cross_encoder, kg_model, kg_tokenizer, emb_tokenizer, sbert
118
 
119
- @st.experimental_singleton(suppress_st_warning=True)
120
  def load_asr_model(asr_model_name):
121
  asr_model = whisper.load_model(asr_model_name)
122
 
123
  return asr_model
124
 
125
- @st.experimental_singleton(suppress_st_warning=True)
126
  def load_whisper_api(audio):
127
  file = open(audio, "rb")
128
  transcript = openai.Audio.translate("whisper-1", file)
129
 
130
  return transcript
131
 
132
- @st.experimental_singleton(suppress_st_warning=True)
133
  def process_corpus(corpus, title, embedding_model, chunk_size=1000, overlap=50):
134
 
135
  '''Process text for Semantic Search'''
@@ -144,7 +144,7 @@ def process_corpus(corpus, title, embedding_model, chunk_size=1000, overlap=50):
144
 
145
  return docsearch
146
 
147
- @st.experimental_singleton(suppress_st_warning=True)
148
  def chunk_and_preprocess_text(text,thresh=500):
149
 
150
  """Chunk text longer than n tokens for summarization"""
@@ -170,7 +170,7 @@ def chunk_and_preprocess_text(text,thresh=500):
170
 
171
  return chunks
172
 
173
- @st.experimental_singleton(suppress_st_warning=True)
174
  def gen_embeddings(embedding_model):
175
 
176
  '''Generate embeddings for given model'''
@@ -187,7 +187,7 @@ def gen_embeddings(embedding_model):
187
 
188
  return embeddings
189
 
190
- @st.experimental_memo(suppress_st_warning=True)
191
  def embed_text(query,title,embedding_model,_docsearch):
192
 
193
  '''Embed text and generate semantic search scores'''
@@ -212,12 +212,12 @@ def embed_text(query,title,embedding_model,_docsearch):
212
 
213
  return answer
214
 
215
- @st.experimental_singleton(suppress_st_warning=True)
216
  def get_spacy():
217
  nlp = en_core_web_lg.load()
218
  return nlp
219
 
220
- @st.experimental_memo(suppress_st_warning=True)
221
  def inference(link, upload, _asr_model):
222
  '''Convert Youtube video or Audio upload to text'''
223
 
@@ -307,7 +307,7 @@ def inference(link, upload, _asr_model):
307
  return results['text'], yt.title
308
 
309
 
310
- @st.experimental_memo(suppress_st_warning=True)
311
  def sentiment_pipe(earnings_text):
312
  '''Determine the sentiment of the text'''
313
 
@@ -316,7 +316,7 @@ def sentiment_pipe(earnings_text):
316
 
317
  return earnings_sentiment, earnings_sentences
318
 
319
- @st.experimental_memo(suppress_st_warning=True)
320
  def summarize_text(text_to_summarize,max_len,min_len):
321
  '''Summarize text with HF model'''
322
 
@@ -329,7 +329,7 @@ def summarize_text(text_to_summarize,max_len,min_len):
329
 
330
  return summarized_text
331
 
332
- @st.experimental_memo(suppress_st_warning=True)
333
  def clean_text(text):
334
  '''Clean all text'''
335
 
@@ -341,7 +341,7 @@ def clean_text(text):
341
 
342
  return text
343
 
344
- @st.experimental_memo(suppress_st_warning=True)
345
  def chunk_long_text(text,threshold,window_size=3,stride=2):
346
  '''Preprocess text and chunk for sentiment analysis'''
347
 
@@ -378,7 +378,7 @@ def summary_downloader(raw_text):
378
  href = f'<a href="data:file/txt;base64,{b64}" download="{new_filename}">Click to Download!!</a>'
379
  st.markdown(href,unsafe_allow_html=True)
380
 
381
- @st.experimental_memo(suppress_st_warning=True)
382
  def get_all_entities_per_sentence(text):
383
  doc = nlp(''.join(text))
384
 
@@ -401,12 +401,12 @@ def get_all_entities_per_sentence(text):
401
 
402
  return entities_all_sentences
403
 
404
- @st.experimental_memo(suppress_st_warning=True)
405
  def get_all_entities(text):
406
  all_entities_per_sentence = get_all_entities_per_sentence(text)
407
  return list(itertools.chain.from_iterable(all_entities_per_sentence))
408
 
409
- @st.experimental_memo(suppress_st_warning=True)
410
  def get_and_compare_entities(article_content,summary_output):
411
 
412
  all_entities_per_sentence = get_all_entities_per_sentence(article_content)
@@ -454,7 +454,7 @@ def get_and_compare_entities(article_content,summary_output):
454
 
455
  return matched_entities, unmatched_entities
456
 
457
- @st.experimental_memo(suppress_st_warning=True)
458
  def highlight_entities(article_content,summary_output):
459
 
460
  markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">"
@@ -506,6 +506,7 @@ def fin_ext(text):
506
 
507
  ## Knowledge Graphs code
508
 
 
509
  def extract_relations_from_model_output(text):
510
  relations = []
511
  relation, subject, relation, object_ = '', '', '', ''
 
97
 
98
  ###################### Functions #######################################################################################
99
 
100
+ @st.cache_resource
101
  def load_models():
102
 
103
  '''Load and cache all the models to be used'''
 
116
 
117
  return sent_pipe, sum_pipe, ner_pipe, cross_encoder, kg_model, kg_tokenizer, emb_tokenizer, sbert
118
 
119
+ @st.cache_resource
120
  def load_asr_model(asr_model_name):
121
  asr_model = whisper.load_model(asr_model_name)
122
 
123
  return asr_model
124
 
125
+ @st.cache_data
126
  def load_whisper_api(audio):
127
  file = open(audio, "rb")
128
  transcript = openai.Audio.translate("whisper-1", file)
129
 
130
  return transcript
131
 
132
+ @st.cache_data
133
  def process_corpus(corpus, title, embedding_model, chunk_size=1000, overlap=50):
134
 
135
  '''Process text for Semantic Search'''
 
144
 
145
  return docsearch
146
 
147
+ @st.cache_data
148
  def chunk_and_preprocess_text(text,thresh=500):
149
 
150
  """Chunk text longer than n tokens for summarization"""
 
170
 
171
  return chunks
172
 
173
+ @st.cache_resource
174
  def gen_embeddings(embedding_model):
175
 
176
  '''Generate embeddings for given model'''
 
187
 
188
  return embeddings
189
 
190
+ @st.cache_data
191
  def embed_text(query,title,embedding_model,_docsearch):
192
 
193
  '''Embed text and generate semantic search scores'''
 
212
 
213
  return answer
214
 
215
+ @st.cache_resource
216
  def get_spacy():
217
  nlp = en_core_web_lg.load()
218
  return nlp
219
 
220
+ @st.cache_data
221
  def inference(link, upload, _asr_model):
222
  '''Convert Youtube video or Audio upload to text'''
223
 
 
307
  return results['text'], yt.title
308
 
309
 
310
+ @st.cache_data
311
  def sentiment_pipe(earnings_text):
312
  '''Determine the sentiment of the text'''
313
 
 
316
 
317
  return earnings_sentiment, earnings_sentences
318
 
319
+ @st.cache_data
320
  def summarize_text(text_to_summarize,max_len,min_len):
321
  '''Summarize text with HF model'''
322
 
 
329
 
330
  return summarized_text
331
 
332
+ @st.cache_data
333
  def clean_text(text):
334
  '''Clean all text'''
335
 
 
341
 
342
  return text
343
 
344
+ @st.cache_data
345
  def chunk_long_text(text,threshold,window_size=3,stride=2):
346
  '''Preprocess text and chunk for sentiment analysis'''
347
 
 
378
  href = f'<a href="data:file/txt;base64,{b64}" download="{new_filename}">Click to Download!!</a>'
379
  st.markdown(href,unsafe_allow_html=True)
380
 
381
+ @st.cache_data
382
  def get_all_entities_per_sentence(text):
383
  doc = nlp(''.join(text))
384
 
 
401
 
402
  return entities_all_sentences
403
 
404
+ @st.cache_data
405
  def get_all_entities(text):
406
  all_entities_per_sentence = get_all_entities_per_sentence(text)
407
  return list(itertools.chain.from_iterable(all_entities_per_sentence))
408
 
409
+ @st.cache_data
410
  def get_and_compare_entities(article_content,summary_output):
411
 
412
  all_entities_per_sentence = get_all_entities_per_sentence(article_content)
 
454
 
455
  return matched_entities, unmatched_entities
456
 
457
+ @st.cache_data
458
  def highlight_entities(article_content,summary_output):
459
 
460
  markdown_start_red = "<mark class=\"entity\" style=\"background: rgb(238, 135, 135);\">"
 
506
 
507
  ## Knowledge Graphs code
508
 
509
+ @st.cache_data
510
  def extract_relations_from_model_output(text):
511
  relations = []
512
  relation, subject, relation, object_ = '', '', '', ''