Spaces:
Running
on
Zero
Running
on
Zero
alibabasglab
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -51,7 +51,7 @@ def find_mp4_files(directory):
|
|
51 |
|
52 |
return mp4_files
|
53 |
|
54 |
-
|
55 |
def fn_clearvoice_tse(input_video):
|
56 |
myClearVoice = ClearVoice(task='target_speaker_extraction', model_names=['AV_MossFormer2_TSE_16K'])
|
57 |
#output_wav_dict =
|
@@ -119,11 +119,10 @@ tse_demo = gr.Interface(
|
|
119 |
gr.Gallery(label="Output Video List")
|
120 |
],
|
121 |
title = "ClearVoice: Audio-visual speaker extraction",
|
122 |
-
description = ("Gradio demo for audio-visual speaker extraction with ClearVoice.
|
123 |
-
"We provide the generalized models trained on mid-scale of data for handling independent speakers and various of background environments. "
|
124 |
"To test it, simply upload your video, or click one of the examples to load them. Read more at the links below."),
|
125 |
-
article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> | <a href='https://github.com/alibabasglab/MossFormer' target='_blank'>Github Repo</a></p>"
|
126 |
-
|
127 |
examples = [
|
128 |
['examples/001.mp4'],
|
129 |
['examples/002.mp4'],
|
@@ -133,6 +132,6 @@ tse_demo = gr.Interface(
|
|
133 |
|
134 |
with demo:
|
135 |
#gr.TabbedInterface([se_demo], ["Speech Enhancement"])
|
136 |
-
gr.TabbedInterface([se_demo, ss_demo, tse_demo], ["Speech Enhancement", "Speech Separation", "
|
137 |
|
138 |
demo.launch()
|
|
|
51 |
|
52 |
return mp4_files
|
53 |
|
54 |
+
|
55 |
def fn_clearvoice_tse(input_video):
|
56 |
myClearVoice = ClearVoice(task='target_speaker_extraction', model_names=['AV_MossFormer2_TSE_16K'])
|
57 |
#output_wav_dict =
|
|
|
119 |
gr.Gallery(label="Output Video List")
|
120 |
],
|
121 |
title = "ClearVoice: Audio-visual speaker extraction",
|
122 |
+
description = ("Gradio demo for audio-visual speaker extraction with ClearVoice."
|
|
|
123 |
"To test it, simply upload your video, or click one of the examples to load them. Read more at the links below."),
|
124 |
+
# article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> | <a href='https://github.com/alibabasglab/MossFormer' target='_blank'>Github Repo</a></p>"
|
125 |
+
# "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> | <a href='https://github.com/alibabasglab/MossFormer2' target='_blank'>Github Repo</a></p>"),
|
126 |
examples = [
|
127 |
['examples/001.mp4'],
|
128 |
['examples/002.mp4'],
|
|
|
132 |
|
133 |
with demo:
|
134 |
#gr.TabbedInterface([se_demo], ["Speech Enhancement"])
|
135 |
+
gr.TabbedInterface([se_demo, ss_demo, tse_demo], ["Speech Enhancement", "Speech Separation", "Audio-visual Speaker Extraction"])
|
136 |
|
137 |
demo.launch()
|