|
|
|
from typing import List, Sequence, Tuple, Optional, Dict, Union, Callable |
|
import pandas as pd |
|
import spacy |
|
from spacy.language import Language |
|
|
|
SPAN_ATTRS = ["text", "label_", "start", "end"] |
|
|
|
|
|
def simple_table(doc: Union[spacy.tokens.Doc, Dict[str, str]], |
|
spans_key: str = "sc", |
|
attrs: List[str] = SPAN_ATTRS): |
|
columns = attrs + ["Conf. score"] |
|
data = [ |
|
[str(getattr(span, attr)) |
|
for attr in attrs] + [score] |
|
for span, score in zip(doc.spans[spans_key], doc.spans[spans_key].attrs['scores']) |
|
] |
|
return data, columns |
|
|
|
|
|
def const_table(doc: Union[spacy.tokens.Doc, Dict[str, str]], |
|
spans_key: str = "sc", |
|
attrs: List[str] = SPAN_ATTRS): |
|
columns = attrs + ["Conf. score", 'span dep', |
|
"POS", "POS sequence", "head"] |
|
data = [] |
|
|
|
for span, score in zip(doc.spans[spans_key], doc.spans[spans_key].attrs['scores']): |
|
|
|
span_info = [] |
|
span_info.extend([str(getattr(span, attr)) for attr in attrs]) |
|
|
|
span_info.append(score) |
|
span_info.append(span.root.dep_) |
|
span_info.append(span.root.tag_) |
|
span_info.append("_".join([t.tag_ for t in span])) |
|
span_info.append(span.root.head.norm_) |
|
|
|
data.append(span_info) |
|
|
|
return data, columns |
|
|
|
|
|
def ngrammar(seq: list, n=2): |
|
result = [] |
|
n_item = len(seq) |
|
for idx, item in enumerate(seq): |
|
if idx + n <= n_item: |
|
result.append(seq[idx: idx + n]) |
|
return result |
|
|