Spaces:
Runtime error
Runtime error
update asr model
Browse files- app.py +10 -10
- requirements.txt +3 -1
app.py
CHANGED
@@ -8,6 +8,8 @@ import nemo.collections.asr as nemo_asr
|
|
8 |
from transformers import pipeline
|
9 |
import numpy as np
|
10 |
import gradio as gr
|
|
|
|
|
11 |
|
12 |
def respond(message, chat_history):
|
13 |
bot_message = message
|
@@ -16,15 +18,13 @@ def respond(message, chat_history):
|
|
16 |
|
17 |
def transcribe(audio):
|
18 |
sr, y = audio
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
22 |
return result
|
23 |
|
24 |
-
# asr_model_id = "openai/whisper-small.en"
|
25 |
-
# asr_model = pipeline("automatic-speech-recognition", model=asr_model_id)
|
26 |
asr_model = nemo_asr.models.EncDecCTCModel.from_pretrained(model_name="nvidia/parakeet-ctc-0.6b")
|
27 |
-
text = asr_model.transcribe(["./Samples/Sample_audios/test.wav"])
|
28 |
|
29 |
with gr.Blocks() as demo:
|
30 |
with gr.Column():
|
@@ -32,22 +32,22 @@ with gr.Blocks() as demo:
|
|
32 |
"""
|
33 |
# HKU Canteen VA
|
34 |
""")
|
35 |
-
gr.Markdown(f"{text}")
|
36 |
va = gr.Chatbot(container=False)
|
37 |
|
38 |
with gr.Row(): # text input
|
39 |
text_input = gr.Textbox(placeholder="Ask me anything...", container=False, scale=1)
|
40 |
submit_btn = gr.Button("Submit", scale=0)
|
41 |
|
42 |
-
|
43 |
-
|
44 |
|
45 |
with gr.Row(): # button toolbar
|
46 |
clear = gr.ClearButton([text_input, va])
|
47 |
|
48 |
text_input.submit(respond, [text_input, va], [text_input, va], queue=False)
|
49 |
submit_btn.click(respond, [text_input, va], [text_input, va], queue=False)
|
50 |
-
# recording.stop_recording(transcribe, [recording], [text_input]).then(respond, [text_input, va], [text_input, va], queue=False)
|
|
|
51 |
|
52 |
if __name__ == "__main__":
|
53 |
demo.launch()
|
|
|
8 |
from transformers import pipeline
|
9 |
import numpy as np
|
10 |
import gradio as gr
|
11 |
+
import librosa
|
12 |
+
from scipy.io.wavfile import write
|
13 |
|
14 |
def respond(message, chat_history):
|
15 |
bot_message = message
|
|
|
18 |
|
19 |
def transcribe(audio):
|
20 |
sr, y = audio
|
21 |
+
audio_name = "resampled_audio.wav"
|
22 |
+
resampled_audio = librosa.resample(y=y.astype("float"), orig_sr=sr, target_sr=16000)
|
23 |
+
write(audio_name, 16000, resampled_audio)
|
24 |
+
result = asr_model.transcribe([f"./{audio_name}"])
|
25 |
return result
|
26 |
|
|
|
|
|
27 |
asr_model = nemo_asr.models.EncDecCTCModel.from_pretrained(model_name="nvidia/parakeet-ctc-0.6b")
|
|
|
28 |
|
29 |
with gr.Blocks() as demo:
|
30 |
with gr.Column():
|
|
|
32 |
"""
|
33 |
# HKU Canteen VA
|
34 |
""")
|
|
|
35 |
va = gr.Chatbot(container=False)
|
36 |
|
37 |
with gr.Row(): # text input
|
38 |
text_input = gr.Textbox(placeholder="Ask me anything...", container=False, scale=1)
|
39 |
submit_btn = gr.Button("Submit", scale=0)
|
40 |
|
41 |
+
with gr.Row(): # audio input
|
42 |
+
recording = gr.Microphone(show_download_button=False, container=False)
|
43 |
|
44 |
with gr.Row(): # button toolbar
|
45 |
clear = gr.ClearButton([text_input, va])
|
46 |
|
47 |
text_input.submit(respond, [text_input, va], [text_input, va], queue=False)
|
48 |
submit_btn.click(respond, [text_input, va], [text_input, va], queue=False)
|
49 |
+
# recording.stop_recording(transcribe, [recording], [text_input]).then(respond,s [text_input, va], [text_input, va], queue=False)
|
50 |
+
recording.stop_recording(transcribe, [recording], [text_input])
|
51 |
|
52 |
if __name__ == "__main__":
|
53 |
demo.launch()
|
requirements.txt
CHANGED
@@ -1,3 +1,5 @@
|
|
1 |
torch
|
2 |
transformers
|
3 |
-
numpy
|
|
|
|
|
|
1 |
torch
|
2 |
transformers
|
3 |
+
numpy
|
4 |
+
librosa
|
5 |
+
scipy
|