srl-for-russian / app.py
Daniil Larionov
removed caching
9c1813e unverified
raw
history blame
1.47 kB
import streamlit as st
import transformers as tr
import spacy as sp
def load_pipeline(name: str):
return tr.pipeline('token-classification', model=name)
pipeline = load_pipeline('Rexhaif/rubert-base-srl-seqlabeling')
def convert_to_spacy(text, result):
output = {
'text': text,
'title': None
}
ents = []
for res in result:
if not res['word'].startswith("##"):
ents.append({
'start': res['start'],
'end': res['end'],
'label': res['entity'].replace("B-", "")
})
else:
ents[-1]['end'] = res['end']
output['ents'] = ents
return output
colors = {
'PREDICATE': "#80bdff",
'КАУЗАТИВ': "#73ffbe",
'КАУЗАТОР': "#ff5b5e",
'ЭКСПЕРИЕНЦЕР': "#efff42",
'ДРУГОЕ': "#924fff",
'ИНСТРУМЕНТ': "#28fff1"
}
options = {
'ents': list(colors.keys()), 'colors': colors
}
st.title("Semantic Role Labeling for Russian Language")
st.header("Type your sentence to see predicate, arguments and their roles")
text = st.text_input('Sentence', 'представители силовых ведомств удивлены такой наглости')
result = pipeline(text)
html = sp.displacy.render(
convert_to_spacy(text, result=result),
style='ent',
manual=True,
options=options,
jupyter=False
)
st.markdown(html, unsafe_allow_html=True)