File size: 3,667 Bytes
02c7bdf
bdaf47a
02c7bdf
 
 
b78b7d0
 
a9e592e
132a2a9
 
 
 
 
f821359
02c7bdf
3192961
b78b7d0
 
 
 
 
4554491
 
 
b78b7d0
4554491
 
 
 
 
 
b78b7d0
3192961
 
3956066
b78b7d0
 
e805751
b78b7d0
 
e805751
b78b7d0
 
 
6963e61
 
b78b7d0
6320c59
 
b78b7d0
 
 
 
 
8bb6908
3956066
e805751
3956066
 
e805751
 
3956066
b78b7d0
3956066
6963e61
 
3956066
6320c59
 
d04ae35
3956066
 
b78b7d0
99710ec
6963e61
 
3acf4c8
0f8dddd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import torch
import soundfile as sf
import gradio as gr
from clearvoice import ClearVoice

def fn_clearvoice_se(input_wav):
    myClearVoice = ClearVoice(task='speech_enhancement', model_names=['FRCRN_SE_16K'])
    output_wav_dict = myClearVoice(input_path=input_wav, online_write=False)
    if isinstance(output_wav_dict, dict):
        key = next(iter(output_wav_dict))
        output_wav = output_wav_dict[key]
    else:
        output_wav = output_wav_dict
    sf.write('enhanced.wav', output_wav, 16000)
    return 'enhanced.wav'

def fn_clearvoice_ss(input_wav):
    myClearVoice = ClearVoice(task='speech_separation', model_names=['MossFormer2_SS_16K'])
    output_wav_dict = myClearVoice(input_path=input_wav, online_write=False)
    if isinstance(output_wav_dict, dict):
        key = next(iter(output_wav_dict))
        output_wav_list = output_wav_dict[key]
        output_wav_s1 = output_wav_list[0]
        output_wav_s2 = output_wav_list[1]
    else:
        output_wav_list = output_wav_dict
        output_wav_s1 = output_wav_list[0]
        output_wav_s2 = output_wav_list[1]
    sf.write('separated_s1.wav', output_wav_s1, 16000)
    sf.write('separated_s2.wav', output_wav_s2, 16000)
    return "separated_s1.wav", "separated_s2.wav"

demo = gr.Blocks()

se_demo = gr.Interface(
    fn=fn_clearvoice_se,
    inputs = [
        gr.Audio(label="Input Audio", type="filepath"),
    ],
    outputs = [
        gr.Audio(label="Output Audio", type="filepath"),
    ],
    title = "ClearVoice: Speech Enhancement",
    description = ("Gradio demo for Speech enhancement with ClearVoice. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."),
    article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> | <a href='https://github.com/alibabasglab/FRCRN' target='_blank'>Github Repo</a></p>"
              ),
    examples = [
        ['examples/mandarin_speech_16kHz.wav'],
        ['examples/english_speech_48kHz.wav'],
    ],
    cache_examples = True,
)

ss_demo = gr.Interface(
    fn=fn_clearvoice_ss,
    inputs = [
        gr.Audio(label="Input Audio", type="filepath"),
    ],
    outputs = [
        gr.Audio(label="Output Audio", type="filepath"),
        gr.Audio(label="Output Audio", type="filepath"),
    ],
    title = "ClearVoice: Speech Separation",
    description = ("Gradio demo for Speech enhancement with ClearVoice. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."),
    article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> | <a href='https://github.com/alibabasglab/MossFormer' target='_blank'>Github Repo</a></p>"
              "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> | <a href='https://github.com/alibabasglab/MossFormer2' target='_blank'>Github Repo</a></p>"),
    examples = [
        ['examples/female_female_speech.wav'],
        ['examples/female_male_speech.wav'],
    ],
    cache_examples = True,
)

with demo:
    #gr.TabbedInterface([se_demo], ["Speech Enhancement"])
    gr.TabbedInterface([se_demo, ss_demo], ["Speech Enhancement", "Speech Separation"])
    
demo.launch()