from typing import List, Sequence, Tuple, Optional, Dict, Union, Callable import pandas as pd import spacy from spacy.language import Language SPAN_ATTRS = ["text", "label_", "start", "end"] def simple_table(doc: Union[spacy.tokens.Doc, Dict[str, str]], spans_key: str = "sc", attrs: List[str] = SPAN_ATTRS): columns = attrs + ["Conf. score"] data = [ [str(getattr(span, attr)) for attr in attrs] + [score] # [f'{score:.5f}'] for span, score in zip(doc.spans[spans_key], doc.spans[spans_key].attrs['scores']) ] return data, columns def const_table(doc: Union[spacy.tokens.Doc, Dict[str, str]], spans_key: str = "sc", attrs: List[str] = SPAN_ATTRS): columns = attrs + ["Conf. score", 'span dep', "POS", "POS sequence", "head"] data = [] for span, score in zip(doc.spans[spans_key], doc.spans[spans_key].attrs['scores']): span_info = [] span_info.extend([str(getattr(span, attr)) for attr in attrs]) span_info.append(score) span_info.append(span.root.dep_) span_info.append(span.root.tag_) span_info.append("_".join([t.tag_ for t in span])) span_info.append(span.root.head.norm_) # span_info.append(span.root.head.dep_ == "ROOT") data.append(span_info) return data, columns def ngrammar(seq: list, n=2): result = [] n_item = len(seq) for idx, item in enumerate(seq): if idx + n <= n_item: result.append(seq[idx: idx + n]) return result