Spaces:
Running
Running
File size: 1,528 Bytes
91eaff6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
`spaCy-entity-linker` demo from
<https://github.com/egerber/spaCy-entity-linker/issues/18>
"""
from icecream import ic # pylint: disable=E0401
import spacy # pylint: disable=E0401
import spacy_entity_linker as sel # pylint: disable=E0401
def link_wikidata (
doc: spacy.tokens.doc.Doc,
) -> None:
"""
Run an entity linking classifier for wikidata
"""
classifier = sel.EntityClassifier.EntityClassifier()
for ent in doc.ents:
print()
ic(ent.text, ent.label_)
# build a term (a simple span) then identify all
# the candidate entities for it
term: sel.TermCandidate = sel.TermCandidate.TermCandidate(ent)
candidates: sel.EntityCandidates.EntityCandidates = term.get_entity_candidates()
ic(candidates)
if len(candidates) > 0:
# select the best candidate
entity: sel.EntityElement.EntityElement = classifier(candidates)
ic(entity.__dict__)
ic(entity.get_sub_entities(limit=10))
ic(entity.get_super_entities(limit=10))
if __name__ == "__main__":
SRC_TEXT: str = """
Werner Herzog is a remarkable filmmaker and an intellectual originally from Germany, the son of Dietrich Herzog.
After the war, Werner fled to America to become famous.
"""
# initialize language model
nlp: spacy.Language = spacy.load("en_core_web_sm")
sample_doc: spacy.tokens.doc.Doc = nlp(SRC_TEXT.strip())
link_wikidata(sample_doc)
|