Spaces:
Running
on
Zero
Running
on
Zero
alibabasglab
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -46,7 +46,7 @@ def find_mp4_files(directory):
|
|
46 |
for root, dirs, files in os.walk(directory):
|
47 |
for file in files:
|
48 |
# Check if the file ends with .mp4
|
49 |
-
if file.endswith(".
|
50 |
mp4_files.append(os.path.join(root, file))
|
51 |
|
52 |
return mp4_files
|
@@ -61,7 +61,7 @@ def fn_clearvoice_tse(input_video):
|
|
61 |
output_list = find_mp4_files('path_to_output_videos_tse/')
|
62 |
print(output_list)
|
63 |
|
64 |
-
return output_list
|
65 |
|
66 |
demo = gr.Blocks()
|
67 |
|
@@ -117,13 +117,12 @@ tse_demo = gr.Interface(
|
|
117 |
gr.Video(label="Input Video"),
|
118 |
],
|
119 |
outputs = [
|
120 |
-
gr.
|
121 |
-
gr.Audio(label="Output Audio", type="filepath"),
|
122 |
],
|
123 |
-
title = "ClearVoice:
|
124 |
-
description = ("Gradio demo for
|
125 |
-
"We provide the generalized models trained on
|
126 |
-
"To test it, simply upload your
|
127 |
article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> | <a href='https://github.com/alibabasglab/MossFormer' target='_blank'>Github Repo</a></p>"
|
128 |
"<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> | <a href='https://github.com/alibabasglab/MossFormer2' target='_blank'>Github Repo</a></p>"),
|
129 |
examples = [
|
|
|
46 |
for root, dirs, files in os.walk(directory):
|
47 |
for file in files:
|
48 |
# Check if the file ends with .mp4
|
49 |
+
if file.endswith(".mp4") and file[:3] == 'est':
|
50 |
mp4_files.append(os.path.join(root, file))
|
51 |
|
52 |
return mp4_files
|
|
|
61 |
output_list = find_mp4_files('path_to_output_videos_tse/')
|
62 |
print(output_list)
|
63 |
|
64 |
+
return output_list
|
65 |
|
66 |
demo = gr.Blocks()
|
67 |
|
|
|
117 |
gr.Video(label="Input Video"),
|
118 |
],
|
119 |
outputs = [
|
120 |
+
gr.Video(label="Output Video List", type="filepath", multiple=True)
|
|
|
121 |
],
|
122 |
+
title = "ClearVoice: Audio-visual speaker extraction",
|
123 |
+
description = ("Gradio demo for audio-visual speaker extraction with ClearVoice. The model (AV_MossFormer2_TSE_16K) supports 16 kHz sampling rate. "
|
124 |
+
"We provide the generalized models trained on mid-scale of data for handling independent speakers and various of background environments. "
|
125 |
+
"To test it, simply upload your video, or click one of the examples to load them. Read more at the links below."),
|
126 |
article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> | <a href='https://github.com/alibabasglab/MossFormer' target='_blank'>Github Repo</a></p>"
|
127 |
"<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> | <a href='https://github.com/alibabasglab/MossFormer2' target='_blank'>Github Repo</a></p>"),
|
128 |
examples = [
|