File size: 4,206 Bytes
4b75840
 
 
 
67caa78
4b75840
 
 
620af8b
 
 
 
 
 
 
4b75840
67caa78
4b75840
 
 
620af8b
 
 
 
 
 
 
 
 
 
4b75840
 
 
 
 
 
 
620af8b
 
 
 
 
 
 
 
 
 
4b75840
 
 
 
 
 
 
 
620af8b
 
 
 
 
 
 
 
 
 
4b75840
 
 
 
 
 
 
 
 
 
 
 
 
 
 
620af8b
 
 
 
 
 
 
 
 
 
4b75840
 
 
 
 
 
 
 
 
 
620af8b
 
 
 
 
4b75840
620af8b
 
 
4b75840
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
620af8b
 
 
 
 
 
 
 
 
4b75840
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import spacy
import pytextrank
import re
from operator import itemgetter
import en_core_web_sm


class KeywordExtractor:
    """ This class is an example

    Attributes:
        class_attribute (str): (class attribute) The class attribute
        instance_attribute (str): The instance attribute
    """

    def __init__(self):
        self.nlp = en_core_web_sm.load()
        self.nlp.add_pipe("textrank")

    def get_keywords(self, text, max_keywords):
        """
        The function to add two Complex Numbers.

        Parameters:
            num (ComplexNumber): The complex number to be added.

        Returns:
            ComplexNumber: A complex number which contains the sum.
        """

        doc = self.nlp(text)

        kws = [i.text for i in doc._.phrases[:max_keywords]]

        return kws

    def get_keyword_indicies(self, string_list, text):
        """
        The function to add two Complex Numbers.

        Parameters:
            num (ComplexNumber): The complex number to be added.

        Returns:
            ComplexNumber: A complex number which contains the sum.
        """

        out = []
        for s in string_list:
            indicies = [[m.start(), m.end()] for m in re.finditer(re.escape(s), text)]
            out.extend(indicies)

        return out

    def merge_overlapping_indicies(self, indicies):
        """
        The function to add two Complex Numbers.

        Parameters:
            num (ComplexNumber): The complex number to be added.

        Returns:
            ComplexNumber: A complex number which contains the sum.
        """

        # Sort the array on the basis of start values of intervals.
        indicies.sort()
        stack = []
        # insert first interval into stack
        stack.append(indicies[0])
        for i in indicies[1:]:
            # Check for overlapping interval,
            # if interval overlap
            if (stack[-1][0] <= i[0] <= stack[-1][-1]) or (stack[-1][-1] == i[0]-1):
                stack[-1][-1] = max(stack[-1][-1], i[-1])
            else:
                stack.append(i)
        return stack

    def merge_until_finished(self, indicies):
        """
        The function to add two Complex Numbers.

        Parameters:
            num (ComplexNumber): The complex number to be added.

        Returns:
            ComplexNumber: A complex number which contains the sum.
        """

        len_indicies = 0
        while True:
            merged = self.merge_overlapping_indicies(indicies)
            if len_indicies == len(merged):
                out_indicies = sorted(merged, key=itemgetter(0))
                return out_indicies
            else:
                len_indicies = len(merged)

    def get_annotation(self, text, indicies, kws):
        """
        The function to add two Complex Numbers.

        Parameters:
            num (ComplexNumber): The complex number to be added.

        Returns:
            ComplexNumber: A complex number which contains the sum.
        """

        arr = list(text)
        for idx in sorted(indicies, reverse=True):
            arr.insert(idx[0], "<kw>")
            arr.insert(idx[1]+1, "XXXxxxXXXxxxXXX <kw>")
        annotation = ''.join(arr)
        split = annotation.split('<kw>')
        final_annotation = [(x.replace('XXXxxxXXXxxxXXX ', ''), "KEY", "#26aaef") if "XXXxxxXXXxxxXXX" in x else x for x in split]

        kws_check = []
        for i in final_annotation:
            if type(i) is tuple:
                kws_check.append(i[0])

        return final_annotation

    def generate(self, text, max_keywords):
        """
        The function to add two Complex Numbers.

        Parameters:
            num (ComplexNumber): The complex number to be added.

        Returns:
            ComplexNumber: A complex number which contains the sum.
        """

        kws = self.get_keywords(text, max_keywords)

        indicies = list(self.get_keyword_indicies(kws, text))
        if indicies:
            indicies_merged = self.merge_until_finished(indicies)
            annotation = self.get_annotation(text, indicies_merged, kws)
        else:
            annotation = None

        return annotation, kws