kaushikbar commited on
Commit
8010151
·
1 Parent(s): 92fe433

Used fasttext for language detection

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -36,7 +36,7 @@ classifiers = {'en': pipeline("zero-shot-classification", hypothesis_template=hy
36
  'no': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['no'],
37
  model=models['no'])}
38
 
39
- fasttext_model = fasttext.load_model(hf_hub_download("julien-c/fasttext-language-id", "model.bin"))
40
 
41
  def prep_examples():
42
  example_text1 = "Coronavirus disease (COVID-19) is an infectious disease caused by the SARS-CoV-2 virus. Most \
@@ -104,10 +104,10 @@ def detect_lang(sequence, labels):
104
  seq_lang = 'en'
105
 
106
  try:
107
- # seq_lang = detect(sequence)
108
- # lbl_lang = detect(labels)
109
- seq_lang = fasttext_model.predict(sequence)[0][0].split("__label__")[1]
110
- lbl_lang = fasttext_model.predict(labels)[0][0].split("__label__")[1]
111
  except:
112
  print("Language detection failed!",
113
  "Date:{}, Sequence:{}, Labels:{}".format(
 
36
  'no': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['no'],
37
  model=models['no'])}
38
 
39
+ fasttext_model = fasttext.load_model(hf_hub_download("julien-c/fasttext-language-id", "lid.176.bin"))
40
 
41
  def prep_examples():
42
  example_text1 = "Coronavirus disease (COVID-19) is an infectious disease caused by the SARS-CoV-2 virus. Most \
 
104
  seq_lang = 'en'
105
 
106
  try:
107
+ #seq_lang = detect(sequence)
108
+ #lbl_lang = detect(labels)
109
+ seq_lang = fasttext_model.predict(sequence, k=1)[0][0].split("__label__")[1]
110
+ lbl_lang = fasttext_model.predict(labels, k=1)[0][0].split("__label__")[1]
111
  except:
112
  print("Language detection failed!",
113
  "Date:{}, Sequence:{}, Labels:{}".format(