Update functions.py
Browse files- functions.py +7 -2
functions.py
CHANGED
@@ -132,7 +132,7 @@ def load_models():
|
|
132 |
ner_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-english")
|
133 |
emb_tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-xl')
|
134 |
sent_pipe = pipeline("text-classification",model=q_model, tokenizer=q_tokenizer)
|
135 |
-
sum_pipe = pipeline("summarization",model="philschmid/flan-t5-base-samsum")
|
136 |
ner_pipe = pipeline("ner", model=ner_model, tokenizer=ner_tokenizer, grouped_entities=True)
|
137 |
cross_encoder = CrossEncoder('cross-encoder/mmarco-mMiniLMv2-L12-H384-v1') #cross-encoder/ms-marco-MiniLM-L-12-v2
|
138 |
sbert = SentenceTransformer('all-MiniLM-L6-v2')
|
@@ -366,7 +366,12 @@ def chunk_and_preprocess_text(text, model_name= 'philschmid/flan-t5-base-samsum'
|
|
366 |
def summarize_text(text_to_summarize,max_len,min_len):
|
367 |
'''Summarize text with HF model'''
|
368 |
|
369 |
-
summarized_text = sum_pipe(text_to_summarize,
|
|
|
|
|
|
|
|
|
|
|
370 |
summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
|
371 |
|
372 |
return summarized_text
|
|
|
132 |
ner_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-english")
|
133 |
emb_tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-xl')
|
134 |
sent_pipe = pipeline("text-classification",model=q_model, tokenizer=q_tokenizer)
|
135 |
+
sum_pipe = pipeline("summarization",model="philschmid/flan-t5-base-samsum",clean_up_tokenization_spaces=True)
|
136 |
ner_pipe = pipeline("ner", model=ner_model, tokenizer=ner_tokenizer, grouped_entities=True)
|
137 |
cross_encoder = CrossEncoder('cross-encoder/mmarco-mMiniLMv2-L12-H384-v1') #cross-encoder/ms-marco-MiniLM-L-12-v2
|
138 |
sbert = SentenceTransformer('all-MiniLM-L6-v2')
|
|
|
366 |
def summarize_text(text_to_summarize,max_len,min_len):
|
367 |
'''Summarize text with HF model'''
|
368 |
|
369 |
+
summarized_text = sum_pipe(text_to_summarize,
|
370 |
+
max_length=max_len,
|
371 |
+
min_length=min_len,
|
372 |
+
do_sample=False,
|
373 |
+
early_stopping=True,
|
374 |
+
num_beams=4)
|
375 |
summarized_text = ' '.join([summ['summary_text'] for summ in summarized_text])
|
376 |
|
377 |
return summarized_text
|