Spaces:
Sleeping
Sleeping
juliuserictuliao
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -18,6 +18,9 @@ def correct_casing(input_sentence):
|
|
18 |
return ' '.join([s.replace(s[0], s[0].capitalize(), 1) for s in sentences])
|
19 |
|
20 |
def asr_transcript(audio):
|
|
|
|
|
|
|
21 |
# Process the audio
|
22 |
input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
|
23 |
# Get logits
|
@@ -31,15 +34,18 @@ def asr_transcript(audio):
|
|
31 |
return transcription
|
32 |
|
33 |
def real_time_asr(audio, state=""):
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
38 |
|
39 |
# Create the Gradio interface
|
40 |
iface = gr.Interface(
|
41 |
fn=real_time_asr,
|
42 |
-
inputs=[gr.Audio(streaming=True), gr.State()],
|
43 |
outputs=[gr.Textbox(), gr.State()],
|
44 |
live=True,
|
45 |
title="Real-Time ASR using Wav2Vec 2.0",
|
|
|
18 |
return ' '.join([s.replace(s[0], s[0].capitalize(), 1) for s in sentences])
|
19 |
|
20 |
def asr_transcript(audio):
|
21 |
+
if audio is None or len(audio) == 0:
|
22 |
+
return ""
|
23 |
+
|
24 |
# Process the audio
|
25 |
input_values = processor(audio, sampling_rate=16000, return_tensors="pt").input_values
|
26 |
# Get logits
|
|
|
34 |
return transcription
|
35 |
|
36 |
def real_time_asr(audio, state=""):
|
37 |
+
try:
|
38 |
+
audio = np.array(audio)
|
39 |
+
transcription = asr_transcript(audio)
|
40 |
+
state += " " + transcription
|
41 |
+
return state, state
|
42 |
+
except Exception as e:
|
43 |
+
return str(e), state
|
44 |
|
45 |
# Create the Gradio interface
|
46 |
iface = gr.Interface(
|
47 |
fn=real_time_asr,
|
48 |
+
inputs=[gr.Audio(source="microphone", streaming=True), gr.State()],
|
49 |
outputs=[gr.Textbox(), gr.State()],
|
50 |
live=True,
|
51 |
title="Real-Time ASR using Wav2Vec 2.0",
|