File size: 1,601 Bytes
a937724
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

from typing import List, Sequence, Tuple, Optional, Dict, Union, Callable
import pandas as pd
import spacy
from spacy.language import Language

SPAN_ATTRS = ["text", "label_", "start", "end"]


def simple_table(doc: Union[spacy.tokens.Doc, Dict[str, str]],
                 spans_key: str = "sc",
                 attrs: List[str] = SPAN_ATTRS):
    columns = attrs + ["Conf. score"]
    data = [
        [str(getattr(span, attr))
         for attr in attrs] + [score]  # [f'{score:.5f}']
        for span, score in zip(doc.spans[spans_key], doc.spans[spans_key].attrs['scores'])
    ]
    return data, columns


def const_table(doc: Union[spacy.tokens.Doc, Dict[str, str]],
                spans_key: str = "sc",
                attrs: List[str] = SPAN_ATTRS):
    columns = attrs + ["Conf. score", 'span dep',
                       "POS", "POS sequence", "head"]
    data = []

    for span, score in zip(doc.spans[spans_key], doc.spans[spans_key].attrs['scores']):

        span_info = []
        span_info.extend([str(getattr(span, attr)) for attr in attrs])

        span_info.append(score)
        span_info.append(span.root.dep_)
        span_info.append(span.root.tag_)
        span_info.append("_".join([t.tag_ for t in span]))
        span_info.append(span.root.head.norm_)
        # span_info.append(span.root.head.dep_ == "ROOT")
        data.append(span_info)

    return data, columns


def ngrammar(seq: list, n=2):
    result = []
    n_item = len(seq)
    for idx, item in enumerate(seq):
        if idx + n <= n_item:
            result.append(seq[idx: idx + n])
    return result