Dabs commited on
Commit
acebe59
·
1 Parent(s): b5b6090

innitial commit

Browse files
Files changed (2) hide show
  1. app.py +96 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from transformers import AutoTokenizer, PreTrainedTokenizerFast
4
+ from transformers import AutoModelForTokenClassification
5
+ from transformers import pipeline
6
+ import spacy
7
+ from spacy import displacy
8
+ from spacy.tokens import Span
9
+
10
+
11
+ # ============ INPUT =================
12
+ os.system("python -m spacy download es_core_news_sm")
13
+ colors = {
14
+ "LOC": "#ff5e5e",
15
+ "MISC": "#ff9999",
16
+ "ORG": "#ffd699",
17
+ "PER": "#80c5c5",
18
+ }
19
+ model_name = "mrm8488/bert-spanish-cased-finetuned-ner"
20
+
21
+ nlp = spacy.load("es_core_news_sm") #Esto es para usar displacy y renderizar las entidades
22
+ nlp.disable_pipes("ner")
23
+
24
+ # ============ Footer, titulo, descripciones y ejemplos ===============
25
+ article = "<div> Entidades nombradas: "
26
+ for clase in colors:
27
+ article += f"<span style='color:#000;background: {colors[clase]}; font-size: 0.8em; font-weight: bold; line-height: 2.5; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5em;;padding:0.5em;'>{clase}</span>"
28
+ article += "</div>"
29
+
30
+ title = "NER en español"
31
+ description = "Esta aplicación es para detección de entidades nombradas en Español"
32
+ examples = ["Hola me llamo David Betancur y vivo en Madrid"]
33
+
34
+ # =============== Modelo ===============
35
+
36
+ model = AutoModelForTokenClassification.from_pretrained(model_name)
37
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
38
+ ner_pipe = pipeline("ner", model=model, tokenizer=tokenizer)
39
+
40
+ # =============== Funcion ===============
41
+ def ner(input_text):
42
+ entities = ner_pipe(input_text, aggregation_strategy="first")
43
+
44
+ doc = nlp(input_text)
45
+
46
+ potential_entities = []
47
+
48
+ for entity in entities:
49
+ start = entity["start"]
50
+ end = entity["end"]
51
+ label = entity["entity_group"]
52
+
53
+ ent = doc.char_span(start, end, label=label)
54
+ if ent != None:
55
+ doc.ents += (ent,)
56
+ else:
57
+ potential_entities.append(entity)
58
+
59
+ potential_entities.append({"entity_group": "NONE", "start": -1, "end": -1})
60
+
61
+ start = potential_entities[0]["start"]
62
+ end = potential_entities[0]["end"]
63
+ label = potential_entities[0]["entity_group"]
64
+
65
+ for item in potential_entities:
66
+ if item["entity_group"] == label and item["start"] == end:
67
+ end = item["end"]
68
+ continue
69
+ else:
70
+ if item["start"] != start:
71
+ ent = doc.char_span(start, end, label=label)
72
+ doc.ents += (ent,)
73
+
74
+ start = item["start"]
75
+ end = item["end"]
76
+ label = item["entity_group"]
77
+
78
+ options = {"ents": colors.keys(), "colors": colors}
79
+
80
+ output = displacy.render(doc, style="ent", options=options)
81
+ return output
82
+
83
+ # ===============Interfaz ===============
84
+ interface = gr.Interface(
85
+ title=title,
86
+ description=description,
87
+ article=article,
88
+ allow_screenshot=False,
89
+ allow_flagging=False,
90
+ fn=ner,
91
+ inputs=gr.inputs.Textbox(placeholder="Insertar el texto para analizar", lines=10),
92
+ outputs=gr.outputs.HTML(),
93
+ examples=examples
94
+ )
95
+
96
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ transformers==4.16.2
2
+ torch==1.10.2
3
+ spacy==3.2.3