Spaces:

alibabasglab
/

ClearVoice

Running on Zero

App Files Files Community

alibabasglab commited on Oct 21, 2024

Commit

e63a812

verified ·

1 Parent(s): 773948d

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -7

app.py CHANGED Viewed

@@ -3,15 +3,20 @@ import soundfile as sf
 import gradio as gr
 from clearvoice import ClearVoice
-def fn_clearvoice_se(input_wav):
-    myClearVoice = ClearVoice(task='speech_enhancement', model_names=['FRCRN_SE_16K'])
     output_wav_dict = myClearVoice(input_path=input_wav, online_write=False)
     if isinstance(output_wav_dict, dict):
         key = next(iter(output_wav_dict))
         output_wav = output_wav_dict[key]
     else:
         output_wav = output_wav_dict
-    sf.write('enhanced.wav', output_wav, 16000)
     return 'enhanced.wav'
 def fn_clearvoice_ss(input_wav):
@@ -36,17 +41,22 @@ se_demo = gr.Interface(
     fn=fn_clearvoice_se,
     inputs = [
         gr.Audio(label="Input Audio", type="filepath"),
     ],
     outputs = [
         gr.Audio(label="Output Audio", type="filepath"),
     ],
     title = "ClearVoice: Speech Enhancement",
-    description = ("Gradio demo for Speech enhancement with ClearVoice. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."),
     article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> | <a href='https://github.com/alibabasglab/FRCRN' target='_blank'>Github Repo</a></p>"
               ),
     examples = [
-        ['examples/mandarin_speech_16kHz.wav'],
-        ['examples/english_speech_48kHz.wav'],
     ],
     cache_examples = True,
 )
@@ -61,7 +71,9 @@ ss_demo = gr.Interface(
         gr.Audio(label="Output Audio", type="filepath"),
     ],
     title = "ClearVoice: Speech Separation",
-    description = ("Gradio demo for Speech enhancement with ClearVoice. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."),
     article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> | <a href='https://github.com/alibabasglab/MossFormer' target='_blank'>Github Repo</a></p>"
               "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> | <a href='https://github.com/alibabasglab/MossFormer2' target='_blank'>Github Repo</a></p>"),
     examples = [

 import gradio as gr
 from clearvoice import ClearVoice
+def fn_clearvoice_se(input_wav, sr):
+    if sr[0] == '16 kHz':
+        myClearVoice = ClearVoice(task='speech_enhancement', model_names=['FRCRN_SE_16K'])
+        fs = 16000
+    else:
+        myClearVoice = ClearVoice(task='speech_enhancement', model_names=['MossFormer2_SE_48K'])
+        fs = 48000
     output_wav_dict = myClearVoice(input_path=input_wav, online_write=False)
     if isinstance(output_wav_dict, dict):
         key = next(iter(output_wav_dict))
         output_wav = output_wav_dict[key]
     else:
         output_wav = output_wav_dict
+    sf.write('enhanced.wav', output_wav, fs)
     return 'enhanced.wav'
 def fn_clearvoice_ss(input_wav):
     fn=fn_clearvoice_se,
     inputs = [
         gr.Audio(label="Input Audio", type="filepath"),
+        gr.Dropdown(
+            ["16 kHz", "48 kHz"], value=["16 kHz"], multiselect=False, label="Sampling Rate", info="Choose the sampling rate for your output."
+        ),
     ],
     outputs = [
         gr.Audio(label="Output Audio", type="filepath"),
     ],
     title = "ClearVoice: Speech Enhancement",
+    description = ("Gradio demo for Speech enhancement with ClearVoice. The models support audios with 16 kHz (FRCRN backbone) and 48 kHz (MossFormer2 backbone) sampling rates. "
+                   "We provide the generalized models trained on large scale of data for handling various of background environments. "
+                   "To test it, simply upload your audio, or click one of the examples to load them. Read more at the links below."),
     article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> | <a href='https://github.com/alibabasglab/FRCRN' target='_blank'>Github Repo</a></p>"
               ),
     examples = [
+        ["examples/mandarin_speech_16kHz.wav", "16 kHz"],
+        ["examples/english_speech_48kHz.wav", "48 kHz"],
     ],
     cache_examples = True,
 )
         gr.Audio(label="Output Audio", type="filepath"),
     ],
     title = "ClearVoice: Speech Separation",
+    description = ("Gradio demo for Speech separation with ClearVoice. The model (MossFormer2 backbone) supports 2 speakers' audio mixtures with 16 kHz sampling rate. "
+                   "We provide the generalized models trained on large scale of data for handling independent speakers and various of background environments. "
+                    "To test it, simply upload your audio, or click one of the examples to load them. Read more at the links below."),
     article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> | <a href='https://github.com/alibabasglab/MossFormer' target='_blank'>Github Repo</a></p>"
               "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> | <a href='https://github.com/alibabasglab/MossFormer2' target='_blank'>Github Repo</a></p>"),
     examples = [