SheldonYC commited on
Commit
78d4870
·
1 Parent(s): 5a17bb9

update asr model

Browse files
Files changed (3) hide show
  1. Samples/Sample_audios/test.wav +0 -0
  2. app.py +7 -2
  3. requirements.txt +1 -0
Samples/Sample_audios/test.wav ADDED
Binary file (238 kB). View file
 
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from transformers import pipeline
2
  import numpy as np
3
  import gradio as gr
@@ -14,8 +15,10 @@ def transcribe(audio):
14
  result = asr_model({"sampling_rate": sr, "raw": y})["text"]
15
  return result
16
 
17
- asr_model_id = "openai/whisper-small.en"
18
- asr_model = pipeline("automatic-speech-recognition", model=asr_model_id)
 
 
19
 
20
  with gr.Blocks() as demo:
21
  with gr.Column():
@@ -23,6 +26,8 @@ with gr.Blocks() as demo:
23
  """
24
  # HKU Canteen VA
25
  """)
 
 
26
  va = gr.Chatbot(container=False)
27
 
28
  with gr.Row(): # text input
 
1
+ import nemo.collections.asr as nemo_asr
2
  from transformers import pipeline
3
  import numpy as np
4
  import gradio as gr
 
15
  result = asr_model({"sampling_rate": sr, "raw": y})["text"]
16
  return result
17
 
18
+ # asr_model_id = "openai/whisper-small.en"
19
+ # asr_model = pipeline("automatic-speech-recognition", model=asr_model_id)
20
+ asr_model = nemo_asr.models.EncDecCTCBPEModel.from_pretrained(model_name="nvidia/parakeet-ctc-0.6b")
21
+ text = asr_model.transcribe(["./Samples/Sample_audios/test.wav"])
22
 
23
  with gr.Blocks() as demo:
24
  with gr.Column():
 
26
  """
27
  # HKU Canteen VA
28
  """)
29
+ gr.Markdown(
30
+ f"{text}")
31
  va = gr.Chatbot(container=False)
32
 
33
  with gr.Row(): # text input
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  torch
2
  transformers
 
3
  numpy
 
1
  torch
2
  transformers
3
+ nemo_toolkit['all']
4
  numpy