Update README.md
Browse files
README.md
CHANGED
@@ -10,7 +10,7 @@ metrics:
|
|
10 |
- f1
|
11 |
- accuracy
|
12 |
model-index:
|
13 |
-
- name:
|
14 |
results:
|
15 |
- task:
|
16 |
name: Token Classification
|
@@ -34,7 +34,7 @@ model-index:
|
|
34 |
value: 0.9785228256835333
|
35 |
---
|
36 |
|
37 |
-
#
|
38 |
|
39 |
This model is a fine-tuned version of [gerulata/slovakbert](https://huggingface.co/gerulata/slovakbert) on the wikiann_sk dataset.
|
40 |
It achieves the following results on the evaluation set:
|
@@ -44,17 +44,68 @@ It achieves the following results on the evaluation set:
|
|
44 |
- F1: 0.9398
|
45 |
- Accuracy: 0.9785
|
46 |
|
47 |
-
##
|
48 |
|
49 |
-
|
|
|
50 |
|
51 |
-
## Intended uses & limitations
|
52 |
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
-
|
|
|
|
|
|
|
|
|
56 |
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
## Training procedure
|
60 |
|
|
|
10 |
- f1
|
11 |
- accuracy
|
12 |
model-index:
|
13 |
+
- name: slovakbert-ner
|
14 |
results:
|
15 |
- task:
|
16 |
name: Token Classification
|
|
|
34 |
value: 0.9785228256835333
|
35 |
---
|
36 |
|
37 |
+
# Named Entity Recognition based on SlovakBERT
|
38 |
|
39 |
This model is a fine-tuned version of [gerulata/slovakbert](https://huggingface.co/gerulata/slovakbert) on the wikiann_sk dataset.
|
40 |
It achieves the following results on the evaluation set:
|
|
|
44 |
- F1: 0.9398
|
45 |
- Accuracy: 0.9785
|
46 |
|
47 |
+
## Intended uses & limitations
|
48 |
|
49 |
+
```
|
50 |
+
from transformers import pipeline
|
51 |
|
|
|
52 |
|
53 |
+
ner_pipeline = pipeline(task='ner', model='crabz/slovakbert-ner')
|
54 |
+
input_sentence = "Minister financií a líder mandátovo najsilnejšieho hnutia OĽaNO Igor Matovič upozorňuje, že následky tretej vlny budú na Slovensku veľmi veľké."
|
55 |
+
classifications = ner_pipeline(input_sentence)
|
56 |
+
```
|
57 |
+
|
58 |
+
with `displaCy`:
|
59 |
+
|
60 |
+
```
|
61 |
+
import spacy
|
62 |
+
from spacy import displacy
|
63 |
+
|
64 |
+
|
65 |
+
ner_map = {0: '0', 1: 'B-OSOBA', 2: 'I-OSOBA', 3: 'B-ORGANIZÁCIA', 4: 'I-ORGANIZÁCIA', 5: 'B-LOKALITA', 6: 'I-LOKALITA'}
|
66 |
+
|
67 |
+
entities = []
|
68 |
+
for i in range(len(classifications)):
|
69 |
+
if classifications[i]['entity'] != 0:
|
70 |
+
if ner_map[classifications[i]['entity']][0] == 'B':
|
71 |
+
j = i + 1
|
72 |
+
while j < len(classifications) and ner_map[classifications[j]['entity']][0] == 'I':
|
73 |
+
j += 1
|
74 |
+
entities.append((ner_map[classifications[i]['entity']].split('-')[1], classifications[i]['start'],
|
75 |
+
classifications[j - 1]['end']))
|
76 |
+
|
77 |
+
nlp = spacy.blank("en") # it should work with any language
|
78 |
+
|
79 |
+
doc = nlp(input_sentence)
|
80 |
+
|
81 |
+
ents = []
|
82 |
+
for ee in entities:
|
83 |
+
ents.append(doc.char_span(ee[1], ee[2], ee[0]))
|
84 |
+
|
85 |
+
doc.ents = ents
|
86 |
+
|
87 |
+
options = {"ents": ["OSOBA", "ORGANIZÁCIA", "LOKALITA"],
|
88 |
+
"colors": {"OSOBA": "lightblue", "ORGANIZÁCIA": "lightcoral", "LOKALITA": "lightgreen"}}
|
89 |
+
displacy_html = displacy.render(doc, style="ent", options=options)
|
90 |
+
|
91 |
+
```
|
92 |
|
93 |
+
<div class="entities" style="line-height: 2.5; direction: ltr">Minister financií a líder mandátovo najsilnejšieho hnutia
|
94 |
+
<mark class="entity" style="background: lightcoral; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
|
95 |
+
OĽaNO
|
96 |
+
<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">ORGANIZÁCIA</span>
|
97 |
+
</mark>
|
98 |
|
99 |
+
<mark class="entity" style="background: lightblue; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
|
100 |
+
Igor Matovič
|
101 |
+
<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">OSOBA</span>
|
102 |
+
</mark>
|
103 |
+
upozorňuje, že následky tretej vlny budú na
|
104 |
+
<mark class="entity" style="background: lightgreen; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
|
105 |
+
Slovensku
|
106 |
+
<span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">LOKALITA</span>
|
107 |
+
</mark>
|
108 |
+
veľmi veľké.</div>
|
109 |
|
110 |
## Training procedure
|
111 |
|