import torch import soundfile as sf import gradio as gr from clearvoice import ClearVoice def fn_clearvoice_se(input_wav): myClearVoice = ClearVoice(task='speech_enhancement', model_names=['FRCRN_SE_16K']) output_wav_dict = myClearVoice(input_path=input_wav, online_write=False) if isinstance(output_wav_dict, dict): key = next(iter(output_wav_dict)) output_wav = output_wav_dict[key] else: output_wav = output_wav_dict sf.write('enhanced.wav', output_wav, 16000) return 'enhanced.wav' def fn_clearvoice_ss(input_wav): myClearVoice = ClearVoice(task='speech_separation', model_names=['MossFormer2_SS_16K']) output_wav_dict = myClearVoice(input_path=input_wav, online_write=False) if isinstance(output_wav_dict, dict): key = next(iter(output_wav_dict)) output_wav_list = output_wav_dict[key] output_wav_s1 = output_wav_list[0] output_wav_s2 = output_wav_list[1] else: output_wav_list = output_wav_dict output_wav_s1 = output_wav_list[0] output_wav_s2 = output_wav_list[1] sf.write('separated_s1.wav', output_wav_s1, 16000) sf.write('separated_s2.wav', output_wav_s2, 16000) return "separated_s1.wav", "separated_s2.wav" demo = gr.Blocks() se_demo = gr.Interface( fn=fn_clearvoice_se, inputs = [ gr.Audio(label="Input Audio", type="filepath"), ], outputs = [ gr.Audio(label="Output Audio", type="filepath"), ], title = "ClearVoice: Speech Enhancement", description = ("Gradio demo for Speech enhancement with ClearVoice. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."), article = ("
FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement | Github Repo
" ), examples = [ ['examples/mandarin_speech_16kHz.wav'], ['examples/english_speech_48kHz.wav'], ], cache_examples = True, ) ss_demo = gr.Interface( fn=fn_clearvoice_ss, inputs = [ gr.Audio(label="Input Audio", type="filepath"), ], outputs = [ gr.Audio(label="Output Audio", type="filepath"), gr.Audio(label="Output Audio", type="filepath"), ], title = "ClearVoice: Speech Separation", description = ("Gradio demo for Speech enhancement with ClearVoice. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."), article = ("MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions | Github Repo
" "MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation | Github Repo
"), examples = [ ['examples/female_female_speech.wav'], ['examples/female_male_speech.wav'], ], cache_examples = True, ) with demo: #gr.TabbedInterface([se_demo], ["Speech Enhancement"]) gr.TabbedInterface([se_demo, ss_demo], ["Speech Enhancement", "Speech Separation"]) demo.launch()