kaushikbar commited on
Commit
33a2a6e
·
1 Parent(s): 83f2778

Used fasttext for language detection

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -1,6 +1,8 @@
1
  import datetime
2
  import gradio as gr
 
3
  from langdetect import detect, DetectorFactory, detect_langs
 
4
  from transformers import pipeline
5
 
6
  models = {'en': 'Narsil/deberta-large-mnli-zero-cls', # English
@@ -34,6 +36,8 @@ classifiers = {'en': pipeline("zero-shot-classification", hypothesis_template=hy
34
  'no': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['no'],
35
  model=models['no'])}
36
 
 
 
37
  def prep_examples():
38
  example_text1 = "Coronavirus disease (COVID-19) is an infectious disease caused by the SARS-CoV-2 virus. Most \
39
  people who fall sick with COVID-19 will experience mild to moderate symptoms and recover without special treatment. \
@@ -100,8 +104,10 @@ def detect_lang(sequence, labels):
100
  seq_lang = 'en'
101
 
102
  try:
103
- seq_lang = detect(sequence)
104
- lbl_lang = detect(labels)
 
 
105
  except:
106
  print("Language detection failed!",
107
  "Date:{}, Sequence:{}, Labels:{}".format(
 
1
  import datetime
2
  import gradio as gr
3
+ from huggingface_hub import hf_hub_download
4
  from langdetect import detect, DetectorFactory, detect_langs
5
+ import fasttext
6
  from transformers import pipeline
7
 
8
  models = {'en': 'Narsil/deberta-large-mnli-zero-cls', # English
 
36
  'no': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['no'],
37
  model=models['no'])}
38
 
39
+ fasttext_model = fasttext.load_model(hf_hub_download("julien-c/fasttext-language-id", "model.bin"))
40
+
41
  def prep_examples():
42
  example_text1 = "Coronavirus disease (COVID-19) is an infectious disease caused by the SARS-CoV-2 virus. Most \
43
  people who fall sick with COVID-19 will experience mild to moderate symptoms and recover without special treatment. \
 
104
  seq_lang = 'en'
105
 
106
  try:
107
+ # seq_lang = detect(sequence)
108
+ # lbl_lang = detect(labels)
109
+ seq_lang = fasttext_model.predict(sequence)[0][0].split("__label__")[1]
110
+ lbl_lang = fasttext_model.predict(labels)[0][0].split("__label__")[1]
111
  except:
112
  print("Language detection failed!",
113
  "Date:{}, Sequence:{}, Labels:{}".format(