nickmuchi commited on
Commit
1c1d7c0
·
1 Parent(s): e6fd33c

Update functions.py

Browse files
Files changed (1) hide show
  1. functions.py +5 -3
functions.py CHANGED
@@ -329,6 +329,8 @@ def chunk_and_preprocess_text(text, model_name= 'philschmid/flan-t5-base-samsum'
329
 
330
  tokenizer = AutoTokenizer.from_pretrained(model_name)
331
  sentences = sent_tokenize(text)
 
 
332
 
333
  # initialize
334
  length = 0
@@ -340,9 +342,9 @@ def chunk_and_preprocess_text(text, model_name= 'philschmid/flan-t5-base-samsum'
340
  count += 1
341
  combined_length = len(tokenizer.tokenize(sentence)) + length # add the no. of sentence tokens to the length counter
342
 
343
- if combined_length <= tokenizer.max_len_single_sentence: # if it doesn't exceed
344
- chunk += sentence + " " # add the sentence to the chunk
345
- length = combined_length # update the length counter
346
 
347
  # if it is the last sentence
348
  if count == len(sentences) - 1:
 
329
 
330
  tokenizer = AutoTokenizer.from_pretrained(model_name)
331
  sentences = sent_tokenize(text)
332
+
333
+ print("sentences: {sentences}")
334
 
335
  # initialize
336
  length = 0
 
342
  count += 1
343
  combined_length = len(tokenizer.tokenize(sentence)) + length # add the no. of sentence tokens to the length counter
344
 
345
+ if combined_length <= tokenizer.max_len_single_sentence: # if it doesn't exceed
346
+ chunk += sentence + " " # add the sentence to the chunk
347
+ length = combined_length # update the length counter
348
 
349
  # if it is the last sentence
350
  if count == len(sentences) - 1: