Spaces:
Sleeping
Sleeping
File size: 4,206 Bytes
4b75840 67caa78 4b75840 620af8b 4b75840 67caa78 4b75840 620af8b 4b75840 620af8b 4b75840 620af8b 4b75840 620af8b 4b75840 620af8b 4b75840 620af8b 4b75840 620af8b 4b75840 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import spacy
import pytextrank
import re
from operator import itemgetter
import en_core_web_sm
class KeywordExtractor:
""" This class is an example
Attributes:
class_attribute (str): (class attribute) The class attribute
instance_attribute (str): The instance attribute
"""
def __init__(self):
self.nlp = en_core_web_sm.load()
self.nlp.add_pipe("textrank")
def get_keywords(self, text, max_keywords):
"""
The function to add two Complex Numbers.
Parameters:
num (ComplexNumber): The complex number to be added.
Returns:
ComplexNumber: A complex number which contains the sum.
"""
doc = self.nlp(text)
kws = [i.text for i in doc._.phrases[:max_keywords]]
return kws
def get_keyword_indicies(self, string_list, text):
"""
The function to add two Complex Numbers.
Parameters:
num (ComplexNumber): The complex number to be added.
Returns:
ComplexNumber: A complex number which contains the sum.
"""
out = []
for s in string_list:
indicies = [[m.start(), m.end()] for m in re.finditer(re.escape(s), text)]
out.extend(indicies)
return out
def merge_overlapping_indicies(self, indicies):
"""
The function to add two Complex Numbers.
Parameters:
num (ComplexNumber): The complex number to be added.
Returns:
ComplexNumber: A complex number which contains the sum.
"""
# Sort the array on the basis of start values of intervals.
indicies.sort()
stack = []
# insert first interval into stack
stack.append(indicies[0])
for i in indicies[1:]:
# Check for overlapping interval,
# if interval overlap
if (stack[-1][0] <= i[0] <= stack[-1][-1]) or (stack[-1][-1] == i[0]-1):
stack[-1][-1] = max(stack[-1][-1], i[-1])
else:
stack.append(i)
return stack
def merge_until_finished(self, indicies):
"""
The function to add two Complex Numbers.
Parameters:
num (ComplexNumber): The complex number to be added.
Returns:
ComplexNumber: A complex number which contains the sum.
"""
len_indicies = 0
while True:
merged = self.merge_overlapping_indicies(indicies)
if len_indicies == len(merged):
out_indicies = sorted(merged, key=itemgetter(0))
return out_indicies
else:
len_indicies = len(merged)
def get_annotation(self, text, indicies, kws):
"""
The function to add two Complex Numbers.
Parameters:
num (ComplexNumber): The complex number to be added.
Returns:
ComplexNumber: A complex number which contains the sum.
"""
arr = list(text)
for idx in sorted(indicies, reverse=True):
arr.insert(idx[0], "<kw>")
arr.insert(idx[1]+1, "XXXxxxXXXxxxXXX <kw>")
annotation = ''.join(arr)
split = annotation.split('<kw>')
final_annotation = [(x.replace('XXXxxxXXXxxxXXX ', ''), "KEY", "#26aaef") if "XXXxxxXXXxxxXXX" in x else x for x in split]
kws_check = []
for i in final_annotation:
if type(i) is tuple:
kws_check.append(i[0])
return final_annotation
def generate(self, text, max_keywords):
"""
The function to add two Complex Numbers.
Parameters:
num (ComplexNumber): The complex number to be added.
Returns:
ComplexNumber: A complex number which contains the sum.
"""
kws = self.get_keywords(text, max_keywords)
indicies = list(self.get_keyword_indicies(kws, text))
if indicies:
indicies_merged = self.merge_until_finished(indicies)
annotation = self.get_annotation(text, indicies_merged, kws)
else:
annotation = None
return annotation, kws
|