egumasa commited on
Commit
7cf7080
·
1 Parent(s): 2f6a316

updated model

Browse files
.gitignore CHANGED
@@ -1,2 +1,6 @@
1
  test_run.py
2
- .DS_Store
 
 
 
 
 
1
  test_run.py
2
+ .DS_Store
3
+ analyzer.py
4
+ main.py
5
+ results/*
6
+ inputtexts/*
demo.py CHANGED
@@ -32,7 +32,7 @@ st.set_page_config(page_title="ENGAGEMENT analyzer (beta ver 0.3)",
32
  @st.cache(allow_output_mutation=True)
33
  def load_model():
34
  # nlp = spacy.load("en_engagement_RoBERTa_context_flz")
35
- nlp = spacy.load("en_engagement_spl_RoBERTa_acad")
36
  return (nlp)
37
 
38
 
 
32
  @st.cache(allow_output_mutation=True)
33
  def load_model():
34
  # nlp = spacy.load("en_engagement_RoBERTa_context_flz")
35
+ nlp = spacy.load("en_engagement_LSTM")
36
  return (nlp)
37
 
38
 
pipeline/__pycache__/custom_functions.cpython-39.pyc ADDED
Binary file (3.61 kB). View file
 
pipeline/__pycache__/post_processors.cpython-39.pyc CHANGED
Binary files a/pipeline/__pycache__/post_processors.cpython-39.pyc and b/pipeline/__pycache__/post_processors.cpython-39.pyc differ
 
pipeline/post_processors.py CHANGED
@@ -19,12 +19,138 @@ def simple_table(doc: Union[spacy.tokens.Doc, Dict[str, str]],
19
  return data, columns
20
 
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def const_table(doc: Union[spacy.tokens.Doc, Dict[str, str]],
23
  spans_key: str = "sc",
24
  attrs: List[str] = SPAN_ATTRS):
25
- columns = attrs + ["Conf. score", 'span dep',
26
- "POS", "POS sequence", "head"]
27
  data = []
 
 
28
 
29
  for span, score in zip(doc.spans[spans_key], doc.spans[spans_key].attrs['scores']):
30
 
@@ -32,11 +158,16 @@ def const_table(doc: Union[spacy.tokens.Doc, Dict[str, str]],
32
  span_info.extend([str(getattr(span, attr)) for attr in attrs])
33
 
34
  span_info.append(score)
 
 
35
  span_info.append(span.root.dep_)
 
36
  span_info.append(span.root.tag_)
 
37
  span_info.append("_".join([t.tag_ for t in span]))
38
  span_info.append(span.root.head.norm_)
39
- # span_info.append(span.root.head.dep_ == "ROOT")
 
40
  data.append(span_info)
41
 
42
  return data, columns
 
19
  return data, columns
20
 
21
 
22
+ # def span_info_aggregator()
23
+
24
+ def construction_classifier(doc, span):
25
+ category = span.root.dep_
26
+ spanroot = span.root
27
+
28
+ ##
29
+ span_t_dep_ = ["_".join([t.norm_, t.dep_]) for t in span]
30
+ span_dep = [t.dep_ for t in span]
31
+ span_token = [t.norm_ for t in span]
32
+ span_tag = [t.tag_ for t in span]
33
+
34
+
35
+ c_dep = [c.dep_ for c in spanroot.children]
36
+ c_pos = [c.pos_ for c in spanroot.children]
37
+ c_tag = [c.tag_ for c in spanroot.children]
38
+
39
+ ## nesting classifiers
40
+ if spanroot.dep_ == "conj":
41
+ while spanroot.dep_ == 'conj':
42
+ spanroot = spanroot.head
43
+ if spanroot.dep_ == "poss":
44
+ while spanroot.dep_ == 'poss':
45
+ spanroot = spanroot.head
46
+
47
+
48
+ ## Simple classifier
49
+ if spanroot.dep_ in ['pcomp']:
50
+ if str(spanroot.morph) in ["Aspect=Prog|Tense=Pres|VerbForm=Part"]:
51
+ category = "Gerund"
52
+
53
+
54
+ if spanroot.dep_ in ["pobj", "dobj", "obj", "iobj"]:
55
+ category = "Object"
56
+ if spanroot.dep_ in ["nsubj", "nsubjpass"]:
57
+ category = "Subject"
58
+ if spanroot.dep_ in ["cc"]:
59
+ category = "Coordinating conjunction"
60
+
61
+ if spanroot.dep_ in ["ROOT", "advcl"]:
62
+ if "ccomp" in c_dep and "auxpass" in c_dep and ("it_nsubjpass" in span_t_dep_ or "it_nsubj" in span_t_dep_):
63
+ category = "It is X that-clause"
64
+ elif "nsubj" in c_dep and "acomp" in c_dep and ("it_nsubjpass" in span_t_dep_ or "it_nsubj" in span_t_dep_):
65
+ category = "It is X that-clause"
66
+ elif "nsubj" in c_dep and "oprd" in c_dep and ("it_nsubjpass" in span_t_dep_ or "it_nsubj" in span_t_dep_):
67
+ category = "It is X that-clause"
68
+ elif "nsubj" in c_dep and "it" in span_token and spanroot.pos_ == "VERB":
69
+ category = "It VERB that-clause"
70
+ elif "expl" in c_dep and "NOUN" in c_pos:
71
+ category = "There is/are NOUN"
72
+ elif spanroot.pos_ in ["AUX", 'VERB']:
73
+ category = "Main verb"
74
+ else:
75
+ category = spanroot.dep_
76
+
77
+ if spanroot.dep_ in ['attr']:
78
+ c_head = [c.dep_ for c in spanroot.head.children]
79
+ if "expl" in c_head and "no_det" in span_t_dep_:
80
+ category = "There is/are no NOUN"
81
+
82
+
83
+ # Modal verbs
84
+ if spanroot.tag_ == "MD":
85
+ category = "Modal auxiliary"
86
+ # prep phrases
87
+ if spanroot.dep_ in ['prep']:
88
+ category = 'Prepositional Phrase'
89
+ # adverbial phrases
90
+ if spanroot.dep_ in ['advmod']:
91
+ category = "Adverbial modifier"
92
+ # adverbial phrases
93
+ if spanroot.dep_ in ['acomp']:
94
+ category = "Adjectival complement"
95
+
96
+ if spanroot.dep_ in ['neg']:
97
+ category = "Negative particle"
98
+
99
+ # Preconjunctions
100
+ if spanroot.dep_ in ['preconj']:
101
+ category = "Conjunction"
102
+
103
+ # Adverbial clauses
104
+ ## Check the status of the adverbial clauses carefully
105
+ if spanroot.dep_ in ['advcl', 'mark', 'acl']:
106
+ if "mark" in span_dep:
107
+ category = "Finite adverbial clause"
108
+ if str(spanroot.morph) in ["Aspect=Prog|Tense=Pres|VerbForm=Part"] and "aux" not in c_dep:
109
+ category = "Non-finite adv clause"
110
+ # Check whether it has a subject or not
111
+ # elif "nsubj" in [c.dep_ for c in spanroot.children]:
112
+ # category = "Adverbial clauses"
113
+ # else:
114
+ # category = "Other advcl"
115
+
116
+ if spanroot.dep_ in ['relcl', 'ccomp']:
117
+ head = spanroot.head
118
+ if ";" in [t.norm_ for t in head.children]:
119
+ category = "Main verb"
120
+ elif "nsubj" not in span_dep:
121
+ category = "Dependent verb"
122
+
123
+ if spanroot.dep_ in ['dep']:
124
+ if spanroot.head.dep_ in ['ROOT', 'ccomp'] and spanroot.head.pos_ in ['AUX', 'VERB'] and spanroot.pos_ in ['AUX', 'VERB']:
125
+ if spanroot.morph == spanroot.head.morph:
126
+ category = "Main verb"
127
+ else:
128
+ category = "Dependent verb"
129
+
130
+
131
+
132
+
133
+ if span.label_ == "CITATION":
134
+ if "NNP" in span_tag or "NNPS" in span_tag:
135
+ if span_dep[0] == 'punct' and span_dep[-1] == 'punct':
136
+ category = "Parenthetical Citation"
137
+ elif span_tag[0] in ["NNP", "NNPS"]:
138
+ category = "Narrative Citation"
139
+ else:
140
+ category = "Other Citation"
141
+
142
+
143
+ return category
144
+
145
+
146
  def const_table(doc: Union[spacy.tokens.Doc, Dict[str, str]],
147
  spans_key: str = "sc",
148
  attrs: List[str] = SPAN_ATTRS):
149
+ columns = attrs + ["Conf. score", "sent no.", "grammatical realization", 'span dep', "ner",
150
+ "POS", 'span dep seq', "POS sequence", "head", "children", "morphology", ]
151
  data = []
152
+ # data = span_info_aggregator(doc, columns)
153
+ sentences = {s: i for i, s in enumerate(doc.sents)}
154
 
155
  for span, score in zip(doc.spans[spans_key], doc.spans[spans_key].attrs['scores']):
156
 
 
158
  span_info.extend([str(getattr(span, attr)) for attr in attrs])
159
 
160
  span_info.append(score)
161
+ span_info.append(sentences[span.sent])
162
+ span_info.append(construction_classifier(doc, span))
163
  span_info.append(span.root.dep_)
164
+ span_info.append(span.root.ent_type_)
165
  span_info.append(span.root.tag_)
166
+ span_info.append("_".join([t.dep_ for t in span]))
167
  span_info.append("_".join([t.tag_ for t in span]))
168
  span_info.append(span.root.head.norm_)
169
+ span_info.append("_".join([c.dep_ for c in span.root.children]))
170
+ span_info.append(span.root.morph)
171
  data.append(span_info)
172
 
173
  return data, columns
resources/__pycache__/colors.cpython-39.pyc ADDED
Binary file (442 Bytes). View file
 
resources/__pycache__/template_list.cpython-39.pyc ADDED
Binary file (2.35 kB). View file
 
resources/__pycache__/text_list.cpython-39.pyc ADDED
Binary file (121 kB). View file
 
resources/colors.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ COLORS_1 = {
2
+ "ENTERTAIN": "#82b74b",
3
+ "DENY": '#c94c4c',
4
+ "COUNTER": "#eea29a",
5
+ "PRONOUNCE": "#92a8d1",
6
+ "ENDORSE": "#034f84",
7
+ "CITATION": "#b2b2b2",
8
+ "MONOGLOSS": "#3e4444",
9
+ "ATTRIBUTE": "#f7786b",
10
+ "ATTRIBUTION": "#f7786b",
11
+ "PROCLAIM": "#92a8d1"
12
+ }
13
+
resources/template_list.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TPL_ENT = """
2
+ <mark class="entity" style="background: {bg}; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
3
+ {text}
4
+ <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">{label}</span>
5
+ </mark>
6
+ """
7
+
8
+ TPL_SPANS = """
9
+ <div class="spans" style="line-height: 4.5;">
10
+ {text}
11
+ {span_slices}
12
+ {span_starts}
13
+ </div>
14
+ """
15
+
16
+ TPL_SPAN = """
17
+ <span style="font-weight: bold; display: inline-block; line-height: 3; padding-bottom: 12px;position: relative;">
18
+ {text}
19
+ {span_slices}
20
+ {span_starts}
21
+ </span>
22
+ """
23
+
24
+ TPL_SPAN_SLICE = """
25
+ <span style="background: {bg}; top: {top_offset}px; display: inline-block; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
26
+ </span>
27
+ """
28
+
29
+ TPL_SPAN_START = """
30
+ <span style="background: {bg}; top: {top_offset}px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;">
31
+ <span style="background: {bg}; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px">
32
+
33
+ {label}{kb_link}
34
+ </span>
35
+ </span>
36
+
37
+ """
38
+
39
+ TPL_SPAN_START_RTL = """
40
+ <span style="background: {bg}; top: {top_offset}px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;">
41
+ <span style="background: {bg}; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px">
42
+ {label}{kb_link}
43
+ </span>
44
+ </span>
45
+ """
46
+
47
+ DEFAULT_TEXT = """Tickner said regardless of the result, the royal commission was a waste of money and he would proceed with a separate inquiry into the issue headed by Justice Jane Matthews. His attack came as the Aboriginal women involved in the case demanded a female minister examine the religious beliefs they claim are inherent in their fight against a bridge to the island near Goolwa in South Australia."""
48
+
resources/text_list.py ADDED
The diff for this file is too large to render. See raw diff
 
utils/__pycache__/util.cpython-39.pyc CHANGED
Binary files a/utils/__pycache__/util.cpython-39.pyc and b/utils/__pycache__/util.cpython-39.pyc differ
 
utils/util.py CHANGED
@@ -8,6 +8,7 @@ def preprocess(text):
8
  text = re.sub('\n', ' ', text)
9
  text = re.sub('\s+', " ", text)
10
  text = re.sub('&&&&&&&&#&#&#&#&', '\n\n', text)
 
11
  return text
12
 
13
 
 
8
  text = re.sub('\n', ' ', text)
9
  text = re.sub('\s+', " ", text)
10
  text = re.sub('&&&&&&&&#&#&#&#&', '\n\n', text)
11
+ text = re.sub("--- Para SEP ---", '\n', text)
12
  return text
13
 
14