import gradio as gr import spaces from transformers import pipeline token_skill_classifier = pipeline(model="jjzha/jobbert_skill_extraction", aggregation_strategy="first") token_knowledge_classifier = pipeline(model="jjzha/jobbert_knowledge_extraction", aggregation_strategy="first") token_skill_classifier.to("cuda") token_knowledge_classifier.to("cuda") examples = [ "Knowing Python is a plus", "Recommend changes, develop and implement processes to ensure compliance with IFRS standards" ] def aggregate_span(results): new_results = [] current_result = results[0] for result in results[1:]: if result["start"] == current_result["end"] + 1: current_result["word"] += " " + result["word"] current_result["end"] = result["end"] else: new_results.append(current_result) current_result = result new_results.append(current_result) return new_results @spaces.GPU def ner(text): output_skills = token_skill_classifier(text) for result in output_skills: if result.get("entity_group"): result["entity"] = "Skill" del result["entity_group"] output_knowledge = token_knowledge_classifier(text) for result in output_knowledge: if result.get("entity_group"): result["entity"] = "Knowledge" del result["entity_group"] if len(output_skills) > 0: output_skills = aggregate_span(output_skills) if len(output_knowledge) > 0: output_knowledge = aggregate_span(output_knowledge) return {"text": text, "entities": output_skills}, {"text": text, "entities": output_knowledge} demo = gr.Interface(fn=ner, inputs=gr.Textbox(placeholder="Enter sentence here..."), outputs=["highlight", "highlight"], examples=examples) demo.launch()