nickmuchi commited on
Commit
45412ff
·
1 Parent(s): f1d1353

Update functions.py

Browse files
Files changed (1) hide show
  1. functions.py +13 -13
functions.py CHANGED
@@ -323,7 +323,7 @@ def sentiment_pipe(earnings_text):
323
  return earnings_sentiment, earnings_sentences
324
 
325
  @st.cache_data
326
- def chunk_and_preprocess_text(text, model_name):
327
 
328
  '''Chunk and preprocess text for summarization'''
329
 
@@ -341,22 +341,22 @@ def chunk_and_preprocess_text(text, model_name):
341
  combined_length = len(tokenizer.tokenize(sentence)) + length # add the no. of sentence tokens to the length counter
342
 
343
  if combined_length <= tokenizer.max_len_single_sentence: # if it doesn't exceed
344
- chunk += sentence + " " # add the sentence to the chunk
345
- length = combined_length # update the length counter
346
 
347
  # if it is the last sentence
348
- if count == len(sentences) - 1:
349
- chunks.append(chunk) # save the chunk
350
 
351
  else:
352
- chunks.append(chunk) # save the chunk
353
- # reset
354
- length = 0
355
- chunk = ""
356
-
357
- # take care of the overflow sentence
358
- chunk += sentence + " "
359
- length = len(tokenizer.tokenize(sentence))
360
 
361
  return chunks
362
 
 
323
  return earnings_sentiment, earnings_sentences
324
 
325
  @st.cache_data
326
+ def chunk_and_preprocess_text(text, model_name= 'philschmid/flan-t5-base-samsum'):
327
 
328
  '''Chunk and preprocess text for summarization'''
329
 
 
341
  combined_length = len(tokenizer.tokenize(sentence)) + length # add the no. of sentence tokens to the length counter
342
 
343
  if combined_length <= tokenizer.max_len_single_sentence: # if it doesn't exceed
344
+ chunk += sentence + " " # add the sentence to the chunk
345
+ length = combined_length # update the length counter
346
 
347
  # if it is the last sentence
348
+ if count == len(sentences) - 1:
349
+ chunks.append(chunk) # save the chunk
350
 
351
  else:
352
+ chunks.append(chunk) # save the chunk
353
+ # reset
354
+ length = 0
355
+ chunk = ""
356
+
357
+ # take care of the overflow sentence
358
+ chunk += sentence + " "
359
+ length = len(tokenizer.tokenize(sentence))
360
 
361
  return chunks
362