new pipeline
Browse files- demo.py +3 -1
- requirements.txt +2 -1
- utils/util.py +84 -7
demo.py
CHANGED
@@ -8,7 +8,7 @@ from spacy.tokens import Doc
|
|
8 |
|
9 |
import streamlit as st
|
10 |
|
11 |
-
from utils.util import delete_overlapping_span
|
12 |
from utils.visualize import visualize_spans
|
13 |
|
14 |
# nlp = spacy.load(
|
@@ -261,6 +261,7 @@ with st.form("my_form"):
|
|
261 |
# st.write(text)
|
262 |
# delete_span(doc.spans['sc'])
|
263 |
|
|
|
264 |
delete_overlapping_span(doc.spans['sc'])
|
265 |
|
266 |
visualize_spans(
|
@@ -292,6 +293,7 @@ st.subheader("Bibliography")
|
|
292 |
st.markdown("""
|
293 |
* Chang, P., & Schleppegrell, M. (2011). Taking an effective authorial stance in academic writing: Making the linguistic resources explicit for L2 writers in the social sciences. _Journal of English for Academic Purposes, 10_ (3), 140β151. https://doi.org/10.1016/j.jeap.2011.05.005
|
294 |
* Martin, J. R., & White, P. R. R. (2005). _The language of evaluation: Appraisal in English._ Palgrave Macmillan.
|
|
|
295 |
* Wu, S. M. (2007). The use of engagement resources in high- and low-rated undergraduate geography essays. _Journal of English for Academic Purposes, 6_ (3), 254β271. https://doi.org/10.1016/j.jeap.2007.09.006
|
296 |
|
297 |
""")
|
|
|
8 |
|
9 |
import streamlit as st
|
10 |
|
11 |
+
from utils.util import delete_overlapping_span, cleanup_justify
|
12 |
from utils.visualize import visualize_spans
|
13 |
|
14 |
# nlp = spacy.load(
|
|
|
261 |
# st.write(text)
|
262 |
# delete_span(doc.spans['sc'])
|
263 |
|
264 |
+
cleanup_justify(doc, doc.spans['sc'])
|
265 |
delete_overlapping_span(doc.spans['sc'])
|
266 |
|
267 |
visualize_spans(
|
|
|
293 |
st.markdown("""
|
294 |
* Chang, P., & Schleppegrell, M. (2011). Taking an effective authorial stance in academic writing: Making the linguistic resources explicit for L2 writers in the social sciences. _Journal of English for Academic Purposes, 10_ (3), 140β151. https://doi.org/10.1016/j.jeap.2011.05.005
|
295 |
* Martin, J. R., & White, P. R. R. (2005). _The language of evaluation: Appraisal in English._ Palgrave Macmillan.
|
296 |
+
* Ryshina-Pankova, M. (2014). Exploring academic argumentation in course-related blogs through ENGAGEMENT. In G. Thompson & L. Alba-Juez (Eds.), _Pragmatics & Beyond New Series (Vol. 242, pp. 281β302)_. John Benjamins Publishing Company. https://doi.org/10.1075/pbns.242.14rys
|
297 |
* Wu, S. M. (2007). The use of engagement resources in high- and low-rated undergraduate geography essays. _Journal of English for Academic Purposes, 6_ (3), 254β271. https://doi.org/10.1016/j.jeap.2007.09.006
|
298 |
|
299 |
""")
|
requirements.txt
CHANGED
@@ -5,4 +5,5 @@ spacy_streamlit
|
|
5 |
# https://huggingface.co/egumasa/en_engagement_RoBERTa_combined/resolve/main/en_engagement_RoBERTa_combined-any-py3-none-any.whl
|
6 |
# https://huggingface.co/egumasa/en_engagement_RoBERTa_context_flz/resolve/main/en_engagement_RoBERTa_context_flz-any-py3-none-any.whl
|
7 |
# https://huggingface.co/egumasa/en_engagement_spl_RoBERTa_acad_max1_do02/resolve/main/en_engagement_spl_RoBERTa_acad_max1_do02-any-py3-none-any.whl
|
8 |
-
https://huggingface.co/egumasa/en_engagement_spl_RoBERTa_acad/resolve/main/en_engagement_spl_RoBERTa_acad-any-py3-none-any.whl
|
|
|
|
5 |
# https://huggingface.co/egumasa/en_engagement_RoBERTa_combined/resolve/main/en_engagement_RoBERTa_combined-any-py3-none-any.whl
|
6 |
# https://huggingface.co/egumasa/en_engagement_RoBERTa_context_flz/resolve/main/en_engagement_RoBERTa_context_flz-any-py3-none-any.whl
|
7 |
# https://huggingface.co/egumasa/en_engagement_spl_RoBERTa_acad_max1_do02/resolve/main/en_engagement_spl_RoBERTa_acad_max1_do02-any-py3-none-any.whl
|
8 |
+
# https://huggingface.co/egumasa/en_engagement_spl_RoBERTa_acad/resolve/main/en_engagement_spl_RoBERTa_acad-any-py3-none-any.whl
|
9 |
+
https://huggingface.co/egumasa/en_engagement_spl_RoBERTa_acad2/resolve/main/en_engagement_spl_RoBERTa_acad2-any-py3-none-any.whl
|
utils/util.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import re
|
2 |
from collections import Counter
|
|
|
3 |
|
4 |
|
5 |
def preprocess(text):
|
@@ -10,6 +11,17 @@ def preprocess(text):
|
|
10 |
return text
|
11 |
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def delete_overlapping_span(span_sc: dict):
|
14 |
# print(span_sc)
|
15 |
start_token_list = [spn.start for spn in span_sc]
|
@@ -21,7 +33,7 @@ def delete_overlapping_span(span_sc: dict):
|
|
21 |
|
22 |
info = {}
|
23 |
for n, (spn, score) in enumerate(zip(span_sc, span_sc.attrs['scores']),
|
24 |
-
start=
|
25 |
res = {
|
26 |
'score': score,
|
27 |
'spn': spn,
|
@@ -54,10 +66,75 @@ def delete_overlapping_span(span_sc: dict):
|
|
54 |
id_del.append(n)
|
55 |
|
56 |
# print(id_comp)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
-
|
59 |
-
# print(idx)
|
60 |
-
try:
|
61 |
-
del span_sc[idx - n]
|
62 |
-
except IndexError:
|
63 |
-
continue
|
|
|
1 |
import re
|
2 |
from collections import Counter
|
3 |
+
from spacy.tokens import SpanGroup
|
4 |
|
5 |
|
6 |
def preprocess(text):
|
|
|
11 |
return text
|
12 |
|
13 |
|
14 |
+
def del_spans(span_sc, indexes: list):
|
15 |
+
|
16 |
+
indexes.sort(
|
17 |
+
reverse=True
|
18 |
+
) # reversing allows the deletion from the last, keeping the original index
|
19 |
+
|
20 |
+
for idx in indexes:
|
21 |
+
if idx + 1 < len(span_sc):
|
22 |
+
del span_sc[idx + 1]
|
23 |
+
|
24 |
+
|
25 |
def delete_overlapping_span(span_sc: dict):
|
26 |
# print(span_sc)
|
27 |
start_token_list = [spn.start for spn in span_sc]
|
|
|
33 |
|
34 |
info = {}
|
35 |
for n, (spn, score) in enumerate(zip(span_sc, span_sc.attrs['scores']),
|
36 |
+
start=0):
|
37 |
res = {
|
38 |
'score': score,
|
39 |
'spn': spn,
|
|
|
66 |
id_del.append(n)
|
67 |
|
68 |
# print(id_comp)
|
69 |
+
del_spans(span_sc, id_del)
|
70 |
+
# for n, idx in enumerate(id_del):
|
71 |
+
# # print(idx)
|
72 |
+
|
73 |
+
# try:
|
74 |
+
# del span_sc[idx - n]
|
75 |
+
# except IndexError:
|
76 |
+
# continue
|
77 |
+
|
78 |
+
|
79 |
+
def cleanup_justify(doc, span_sc: dict):
|
80 |
+
# This function adjusts the JUSTIFYING span
|
81 |
+
|
82 |
+
# First create an index of span with JUSTIFYING tags
|
83 |
+
justifies = {}
|
84 |
+
for idx, span in enumerate(span_sc):
|
85 |
+
# temp_root = span.root
|
86 |
+
# while span.start <= temp_root.head.i <= span.end:
|
87 |
+
# temp_root = temp_root.head
|
88 |
+
if span.label_ in ['JUSTIFYING']:
|
89 |
+
justifies[span.root] = {
|
90 |
+
"span": span,
|
91 |
+
"head": span.root.head,
|
92 |
+
"start": span.start,
|
93 |
+
"end": span.end,
|
94 |
+
"del": False,
|
95 |
+
"dependent": False,
|
96 |
+
"span_idx": idx
|
97 |
+
}
|
98 |
+
# print(justifies)
|
99 |
+
|
100 |
+
# flagging the dependency
|
101 |
+
for spanroot, info in justifies.items():
|
102 |
+
if spanroot.head in justifies:
|
103 |
+
info['dependent'] = True
|
104 |
+
info['del'] = True
|
105 |
+
|
106 |
+
# print(justifies)
|
107 |
+
new_spans = []
|
108 |
+
for spanroot, info in justifies.items():
|
109 |
+
|
110 |
+
if not info['dependent']:
|
111 |
+
# print("New Justifying candidate span:")
|
112 |
+
# print(doc[spanroot.left_edge.i:spanroot.right_edge.i + 1])
|
113 |
+
|
114 |
+
new_span = doc[spanroot.left_edge.i:spanroot.right_edge.i + 1]
|
115 |
+
new_span.label_ = "JUSTIFYING"
|
116 |
+
|
117 |
+
if new_span not in span_sc:
|
118 |
+
new_spans.append(new_span)
|
119 |
+
info['del'] = True
|
120 |
+
|
121 |
+
else:
|
122 |
+
info['del'] = True
|
123 |
+
|
124 |
+
to_delete = [
|
125 |
+
info['span_idx'] for spanroot, info in justifies.items() if info['del']
|
126 |
+
]
|
127 |
+
|
128 |
+
to_delete_span = [
|
129 |
+
info['span'] for spanroot, info in justifies.items() if info['del']
|
130 |
+
]
|
131 |
+
|
132 |
+
# print(to_delete)
|
133 |
+
# print(to_delete_span)
|
134 |
+
|
135 |
+
del_spans(span_sc, to_delete)
|
136 |
+
|
137 |
+
span_grp = SpanGroup(doc, spans=new_spans)
|
138 |
+
span_sc.extend(span_grp)
|
139 |
|
140 |
+
# print(justifies)
|
|
|
|
|
|
|
|
|
|