Varosa commited on
Commit
0ad5b35
·
1 Parent(s): 9be6ad2

uploaded all files

Browse files
Files changed (3) hide show
  1. app.py +67 -0
  2. requirements.txt +6 -0
  3. utils.py +55 -0
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
4
+ from utils import lang_ids
5
+ import nltk
6
+ nltk.download('punkt')
7
+
8
+ MODEL_NAME = "Pranjal12345/pranjal_whisper_medium"
9
+ BATCH_SIZE = 8
10
+ FILE_LIMIT_MB = 1000
11
+
12
+ pipe = pipeline(
13
+ task="automatic-speech-recognition",
14
+ model=MODEL_NAME,
15
+ chunk_length_s=30,
16
+ device='cpu',
17
+ )
18
+
19
+ lang_list = list(lang_ids.keys())
20
+
21
+ def translate_audio(inputs,target_language):
22
+ if inputs is None:
23
+ raise gr.Error("No audio file submitted! Please upload an audio file before submitting your request.")
24
+
25
+ text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "translate"}, return_timestamps=True)["text"]
26
+
27
+ target_lang = lang_ids[target_language]
28
+
29
+ if target_language == 'English':
30
+ return text
31
+
32
+ else:
33
+ model = MBartForConditionalGeneration.from_pretrained("sanjitaa/mbart-many-to-many")
34
+ tokenizer = MBart50TokenizerFast.from_pretrained("sanjitaa/mbart-many-to-many")
35
+
36
+ tokenizer.src_lang = "en_XX"
37
+ chunks = nltk.tokenize.sent_tokenize(text)
38
+ translated_text = ''
39
+
40
+ for segment in chunks:
41
+ encoded_chunk = tokenizer(segment, return_tensors="pt")
42
+ generated_tokens = model.generate(
43
+
44
+ **encoded_chunk,
45
+ forced_bos_token_id=tokenizer.lang_code_to_id[target_lang]
46
+ )
47
+ translated_chunk = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
48
+ translated_text = translated_text + translated_chunk[0]
49
+ return translated_text
50
+
51
+ inputs=[
52
+ gr.inputs.Audio(source="upload", type="filepath", label="Audio file"),
53
+ gr.Dropdown(lang_list, value="English", label="Target Language"),
54
+ ]
55
+ description = "Audio translation"
56
+
57
+
58
+ translation_interface = gr.Interface(
59
+ fn=translate_audio,
60
+ inputs= inputs,
61
+ outputs="text",
62
+ title="Speech Translation",
63
+ description= description
64
+ )
65
+
66
+ if __name__ == "__main__":
67
+ translation_interface.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ requests
4
+ python-multipart
5
+ sentencepiece
6
+ nltk
utils.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ lang_ids = {
2
+ "Arabic": "ar_AR",
3
+ "Czech": "cs_CZ",
4
+ "German": "de_DE",
5
+ "English": "en_XX",
6
+ "Spanish": "es_XX",
7
+ "Estonian": "et_EE",
8
+ "Finnish": "fi_FI",
9
+ "French": "fr_XX",
10
+ "Gujarati": "gu_IN",
11
+ "Hindi": "hi_IN",
12
+ "Italian": "it_IT",
13
+ "Japanese":"ja_XX",
14
+ "Kazakh": "kk_KZ",
15
+ "Korean": "ko_KR",
16
+ "Lithuanian": "lt_LT",
17
+ "Latvian": "lv_LV",
18
+ "Burmese": "my_MM",
19
+ "Nepali": "ne_NP",
20
+ "Dutch": "nl_XX",
21
+ "Romanian": "ro_RO",
22
+ "Russian": "ru_RU",
23
+ "Sinhala": "si_LK",
24
+ "Turkish": "tr_TR",
25
+ "Vietnamese": "vi_VN",
26
+ "Chinese": "zh_CN",
27
+ "Afrikaans": "af_ZA",
28
+ "Azerbaijani": "az_AZ",
29
+ "Bengali": "bn_IN",
30
+ "Persian": "fa_IR",
31
+ "Hebrew": "he_IL",
32
+ "Croatian": "hr_HR",
33
+ "Indonesian": "id_ID",
34
+ "Georgian": "ka_GE",
35
+ "Khmer": "km_KH",
36
+ "Macedonian": "mk_MK",
37
+ "Malayalam": "ml_IN",
38
+ "Mongolian": "mn_MN",
39
+ "Marathi": "mr_IN",
40
+ "Polish": "pl_PL",
41
+ "Pashto": "ps_AF",
42
+ "Portuguese": "pt_XX",
43
+ "Swedish": "sv_SE",
44
+ "Swahili": "sw_KE",
45
+ "Tamil": "ta_IN",
46
+ "Telugu": "te_IN",
47
+ "Thai": "th_TH",
48
+ "Tagalog": "tl_XX",
49
+ "Ukrainian": "uk_UA",
50
+ "Urdu": "ur_PK",
51
+ "Xhosa": "xh_ZA",
52
+ "Galician": "gl_ES",
53
+ "Slovene": "sl_SI",
54
+ }
55
+