|
import gradio as gr |
|
import fasttext |
|
from huggingface_hub import hf_hub_download |
|
import re |
|
import string |
|
import numpy as np |
|
|
|
def load_GlotLID(): |
|
model_path = hf_hub_download(repo_id="cis-lmu/glotlid", filename="model_v3.bin") |
|
model = fasttext.load_model(model_path) |
|
return model |
|
|
|
model = load_GlotLID() |
|
|
|
def preprocess_text(text): |
|
text = text.replace('\n', ' ') |
|
replace_by = " " |
|
replacement_map = {ord(c): replace_by for c in ':•#{|}' + string.digits} |
|
text = text.translate(replacement_map) |
|
text = re.sub(r'\s+', ' ', text) |
|
return text.strip() |
|
|
|
def compute(sentence): |
|
sentence = preprocess_text(sentence) |
|
|
|
|
|
labels, probs = model.predict(sentence, k=3) |
|
probs = np.asarray(probs) |
|
|
|
results = [] |
|
for label, score in zip(labels, probs): |
|
label = label.split('__')[-1] |
|
results.append(f"{label}: {score:.4f}") |
|
|
|
return "\n".join(results) |
|
|
|
iface = gr.Interface( |
|
fn=compute, |
|
inputs=gr.Textbox(label="Enter a sentence"), |
|
outputs=gr.Textbox(label="Top 3 Language Predictions"), |
|
title="GlotLID: Language Identification (v3)", |
|
description="This app uses GlotLID v3 to identify the top 3 most likely languages for the input text." |
|
) |
|
|
|
iface.launch() |