Files changed (2) hide show
  1. app.py +19 -5
  2. assets/sample_input_2.mp3 +3 -0
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
 
 
3
  import gradio as gr
4
  import numpy as np
5
  import torch
@@ -48,12 +49,12 @@ translator = Translator(
48
  def predict(
49
  task_name: str,
50
  audio_source: str,
51
- input_audio_mic: str | None,
52
- input_audio_file: str | None,
53
- input_text: str | None,
54
- source_language: str | None,
55
  target_language: str,
56
- ) -> tuple[tuple[int, np.ndarray] | None, str]:
57
  task_name = task_name.split()[0]
58
  source_language_code = LANGUAGE_NAME_TO_CODE.get(source_language, None)
59
  target_language_code = LANGUAGE_NAME_TO_CODE[target_language]
@@ -290,6 +291,8 @@ with gr.Blocks(css="style.css") as demo:
290
  examples=[
291
  ["assets/sample_input.mp3", "French"],
292
  ["assets/sample_input.mp3", "Mandarin Chinese"],
 
 
293
  ],
294
  inputs=[input_audio_file, target_language],
295
  outputs=[output_audio, output_text],
@@ -301,6 +304,8 @@ with gr.Blocks(css="style.css") as demo:
301
  examples=[
302
  ["assets/sample_input.mp3", "French"],
303
  ["assets/sample_input.mp3", "Mandarin Chinese"],
 
 
304
  ],
305
  inputs=[input_audio_file, target_language],
306
  outputs=[output_audio, output_text],
@@ -312,6 +317,10 @@ with gr.Blocks(css="style.css") as demo:
312
  examples=[
313
  ["My favorite animal is the elephant.", "English", "French"],
314
  ["My favorite animal is the elephant.", "English", "Mandarin Chinese"],
 
 
 
 
315
  ],
316
  inputs=[input_text, source_language, target_language],
317
  outputs=[output_audio, output_text],
@@ -323,6 +332,10 @@ with gr.Blocks(css="style.css") as demo:
323
  examples=[
324
  ["My favorite animal is the elephant.", "English", "French"],
325
  ["My favorite animal is the elephant.", "English", "Mandarin Chinese"],
 
 
 
 
326
  ],
327
  inputs=[input_text, source_language, target_language],
328
  outputs=[output_audio, output_text],
@@ -333,6 +346,7 @@ with gr.Blocks(css="style.css") as demo:
333
  asr_examples = gr.Examples(
334
  examples=[
335
  ["assets/sample_input.mp3", "English"],
 
336
  ],
337
  inputs=[input_audio_file, target_language],
338
  outputs=[output_audio, output_text],
 
1
  import os
2
 
3
+ from typing import Union
4
  import gradio as gr
5
  import numpy as np
6
  import torch
 
49
  def predict(
50
  task_name: str,
51
  audio_source: str,
52
+ input_audio_mic: Union[str, None],
53
+ input_audio_file: Union[str, None],
54
+ input_text: Union[str, None],
55
+ source_language: Union[str, None],
56
  target_language: str,
57
+ ) -> tuple[Union[tuple[int, np.ndarray], None], str]:
58
  task_name = task_name.split()[0]
59
  source_language_code = LANGUAGE_NAME_TO_CODE.get(source_language, None)
60
  target_language_code = LANGUAGE_NAME_TO_CODE[target_language]
 
291
  examples=[
292
  ["assets/sample_input.mp3", "French"],
293
  ["assets/sample_input.mp3", "Mandarin Chinese"],
294
+ ["assets/sample_input_2.mp3", "Hindi"],
295
+ ["assets/sample_input_2.mp3", "Spanish"],
296
  ],
297
  inputs=[input_audio_file, target_language],
298
  outputs=[output_audio, output_text],
 
304
  examples=[
305
  ["assets/sample_input.mp3", "French"],
306
  ["assets/sample_input.mp3", "Mandarin Chinese"],
307
+ ["assets/sample_input_2.mp3", "Hindi"],
308
+ ["assets/sample_input_2.mp3", "Spanish"],
309
  ],
310
  inputs=[input_audio_file, target_language],
311
  outputs=[output_audio, output_text],
 
317
  examples=[
318
  ["My favorite animal is the elephant.", "English", "French"],
319
  ["My favorite animal is the elephant.", "English", "Mandarin Chinese"],
320
+ ["Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
321
+ "English", "Hindi"],
322
+ ["Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
323
+ "English", "Spanish"],
324
  ],
325
  inputs=[input_text, source_language, target_language],
326
  outputs=[output_audio, output_text],
 
332
  examples=[
333
  ["My favorite animal is the elephant.", "English", "French"],
334
  ["My favorite animal is the elephant.", "English", "Mandarin Chinese"],
335
+ ["Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
336
+ "English", "Hindi"],
337
+ ["Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
338
+ "English", "Spanish"],
339
  ],
340
  inputs=[input_text, source_language, target_language],
341
  outputs=[output_audio, output_text],
 
346
  asr_examples = gr.Examples(
347
  examples=[
348
  ["assets/sample_input.mp3", "English"],
349
+ ["assets/sample_input_2.mp3", "English"],
350
  ],
351
  inputs=[input_audio_file, target_language],
352
  outputs=[output_audio, output_text],
assets/sample_input_2.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a505a4641e3f5f0ddec9508832793aa20e63d2545530b66bc04a9bd19a742e6
3
+ size 30624