Spaces:

dt
/

ner_spanish

Runtime error

App Files Files Community

Dabs commited on Mar 1, 2022

Commit

acebe59

1 Parent(s): b5b6090

innitial commit

Browse files

Files changed (2) hide show

app.py +96 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import os
+import gradio as gr
+from transformers import AutoTokenizer, PreTrainedTokenizerFast
+from transformers import AutoModelForTokenClassification
+from transformers import pipeline
+import spacy
+from spacy import displacy
+from spacy.tokens import Span
+# ============ INPUT =================
+os.system("python -m spacy download es_core_news_sm")
+colors = {
+    "LOC": "#ff5e5e",
+    "MISC": "#ff9999",
+    "ORG": "#ffd699",
+    "PER": "#80c5c5",
+}
+model_name = "mrm8488/bert-spanish-cased-finetuned-ner"
+nlp = spacy.load("es_core_news_sm") #Esto es para usar displacy y renderizar las entidades
+nlp.disable_pipes("ner")
+# ============ Footer, titulo, descripciones y ejemplos ===============
+article = "<div> Entidades nombradas: "
+for clase in colors:
+    article += f"<span style='color:#000;background: {colors[clase]}; font-size: 0.8em; font-weight: bold; line-height: 2.5; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5em;;padding:0.5em;'>{clase}</span>"
+article += "</div>"
+title = "NER en español"
+description = "Esta aplicación es para detección de entidades nombradas en Español"
+examples = ["Hola me llamo David Betancur y vivo en Madrid"]
+# =============== Modelo ===============
+model = AutoModelForTokenClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+ner_pipe = pipeline("ner", model=model, tokenizer=tokenizer)
+# =============== Funcion ===============
+def ner(input_text):
+  entities = ner_pipe(input_text, aggregation_strategy="first")
+  doc = nlp(input_text)
+  potential_entities = []
+  for entity in entities:
+    start = entity["start"]
+    end = entity["end"]
+    label = entity["entity_group"]
+    ent = doc.char_span(start, end, label=label)
+    if ent != None:
+      doc.ents += (ent,)
+    else:
+      potential_entities.append(entity)
+  potential_entities.append({"entity_group": "NONE", "start": -1, "end": -1})
+  start = potential_entities[0]["start"]
+  end = potential_entities[0]["end"]
+  label = potential_entities[0]["entity_group"]
+  for item in potential_entities:
+    if item["entity_group"] == label and item["start"] == end:
+      end = item["end"]
+      continue
+    else:
+      if item["start"] != start:
+        ent = doc.char_span(start, end, label=label)
+        doc.ents += (ent,)
+      start = item["start"]
+      end = item["end"]
+      label = item["entity_group"]
+  options = {"ents": colors.keys(), "colors": colors}
+  output = displacy.render(doc, style="ent", options=options)
+  return output
+# ===============Interfaz ===============
+interface = gr.Interface(
+    title=title,
+    description=description,
+    article=article,
+    allow_screenshot=False,
+    allow_flagging=False,
+    fn=ner,
+    inputs=gr.inputs.Textbox(placeholder="Insertar el texto para analizar", lines=10),
+    outputs=gr.outputs.HTML(),
+    examples=examples
+    )
+interface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+transformers==4.16.2
+torch==1.10.2
+spacy==3.2.3