Spaces:
Runtime error
Runtime error
innitial commit
Browse files- app.py +96 -0
- requirements.txt +3 -0
app.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
from transformers import AutoTokenizer, PreTrainedTokenizerFast
|
4 |
+
from transformers import AutoModelForTokenClassification
|
5 |
+
from transformers import pipeline
|
6 |
+
import spacy
|
7 |
+
from spacy import displacy
|
8 |
+
from spacy.tokens import Span
|
9 |
+
|
10 |
+
|
11 |
+
# ============ INPUT =================
|
12 |
+
os.system("python -m spacy download es_core_news_sm")
|
13 |
+
colors = {
|
14 |
+
"LOC": "#ff5e5e",
|
15 |
+
"MISC": "#ff9999",
|
16 |
+
"ORG": "#ffd699",
|
17 |
+
"PER": "#80c5c5",
|
18 |
+
}
|
19 |
+
model_name = "mrm8488/bert-spanish-cased-finetuned-ner"
|
20 |
+
|
21 |
+
nlp = spacy.load("es_core_news_sm") #Esto es para usar displacy y renderizar las entidades
|
22 |
+
nlp.disable_pipes("ner")
|
23 |
+
|
24 |
+
# ============ Footer, titulo, descripciones y ejemplos ===============
|
25 |
+
article = "<div> Entidades nombradas: "
|
26 |
+
for clase in colors:
|
27 |
+
article += f"<span style='color:#000;background: {colors[clase]}; font-size: 0.8em; font-weight: bold; line-height: 2.5; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5em;;padding:0.5em;'>{clase}</span>"
|
28 |
+
article += "</div>"
|
29 |
+
|
30 |
+
title = "NER en español"
|
31 |
+
description = "Esta aplicación es para detección de entidades nombradas en Español"
|
32 |
+
examples = ["Hola me llamo David Betancur y vivo en Madrid"]
|
33 |
+
|
34 |
+
# =============== Modelo ===============
|
35 |
+
|
36 |
+
model = AutoModelForTokenClassification.from_pretrained(model_name)
|
37 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
38 |
+
ner_pipe = pipeline("ner", model=model, tokenizer=tokenizer)
|
39 |
+
|
40 |
+
# =============== Funcion ===============
|
41 |
+
def ner(input_text):
|
42 |
+
entities = ner_pipe(input_text, aggregation_strategy="first")
|
43 |
+
|
44 |
+
doc = nlp(input_text)
|
45 |
+
|
46 |
+
potential_entities = []
|
47 |
+
|
48 |
+
for entity in entities:
|
49 |
+
start = entity["start"]
|
50 |
+
end = entity["end"]
|
51 |
+
label = entity["entity_group"]
|
52 |
+
|
53 |
+
ent = doc.char_span(start, end, label=label)
|
54 |
+
if ent != None:
|
55 |
+
doc.ents += (ent,)
|
56 |
+
else:
|
57 |
+
potential_entities.append(entity)
|
58 |
+
|
59 |
+
potential_entities.append({"entity_group": "NONE", "start": -1, "end": -1})
|
60 |
+
|
61 |
+
start = potential_entities[0]["start"]
|
62 |
+
end = potential_entities[0]["end"]
|
63 |
+
label = potential_entities[0]["entity_group"]
|
64 |
+
|
65 |
+
for item in potential_entities:
|
66 |
+
if item["entity_group"] == label and item["start"] == end:
|
67 |
+
end = item["end"]
|
68 |
+
continue
|
69 |
+
else:
|
70 |
+
if item["start"] != start:
|
71 |
+
ent = doc.char_span(start, end, label=label)
|
72 |
+
doc.ents += (ent,)
|
73 |
+
|
74 |
+
start = item["start"]
|
75 |
+
end = item["end"]
|
76 |
+
label = item["entity_group"]
|
77 |
+
|
78 |
+
options = {"ents": colors.keys(), "colors": colors}
|
79 |
+
|
80 |
+
output = displacy.render(doc, style="ent", options=options)
|
81 |
+
return output
|
82 |
+
|
83 |
+
# ===============Interfaz ===============
|
84 |
+
interface = gr.Interface(
|
85 |
+
title=title,
|
86 |
+
description=description,
|
87 |
+
article=article,
|
88 |
+
allow_screenshot=False,
|
89 |
+
allow_flagging=False,
|
90 |
+
fn=ner,
|
91 |
+
inputs=gr.inputs.Textbox(placeholder="Insertar el texto para analizar", lines=10),
|
92 |
+
outputs=gr.outputs.HTML(),
|
93 |
+
examples=examples
|
94 |
+
)
|
95 |
+
|
96 |
+
interface.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
transformers==4.16.2
|
2 |
+
torch==1.10.2
|
3 |
+
spacy==3.2.3
|