alibabasglab commited on
Commit
e63a812
·
verified ·
1 Parent(s): 773948d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -7
app.py CHANGED
@@ -3,15 +3,20 @@ import soundfile as sf
3
  import gradio as gr
4
  from clearvoice import ClearVoice
5
 
6
- def fn_clearvoice_se(input_wav):
7
- myClearVoice = ClearVoice(task='speech_enhancement', model_names=['FRCRN_SE_16K'])
 
 
 
 
 
8
  output_wav_dict = myClearVoice(input_path=input_wav, online_write=False)
9
  if isinstance(output_wav_dict, dict):
10
  key = next(iter(output_wav_dict))
11
  output_wav = output_wav_dict[key]
12
  else:
13
  output_wav = output_wav_dict
14
- sf.write('enhanced.wav', output_wav, 16000)
15
  return 'enhanced.wav'
16
 
17
  def fn_clearvoice_ss(input_wav):
@@ -36,17 +41,22 @@ se_demo = gr.Interface(
36
  fn=fn_clearvoice_se,
37
  inputs = [
38
  gr.Audio(label="Input Audio", type="filepath"),
 
 
 
39
  ],
40
  outputs = [
41
  gr.Audio(label="Output Audio", type="filepath"),
42
  ],
43
  title = "ClearVoice: Speech Enhancement",
44
- description = ("Gradio demo for Speech enhancement with ClearVoice. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."),
 
 
45
  article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> | <a href='https://github.com/alibabasglab/FRCRN' target='_blank'>Github Repo</a></p>"
46
  ),
47
  examples = [
48
- ['examples/mandarin_speech_16kHz.wav'],
49
- ['examples/english_speech_48kHz.wav'],
50
  ],
51
  cache_examples = True,
52
  )
@@ -61,7 +71,9 @@ ss_demo = gr.Interface(
61
  gr.Audio(label="Output Audio", type="filepath"),
62
  ],
63
  title = "ClearVoice: Speech Separation",
64
- description = ("Gradio demo for Speech enhancement with ClearVoice. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."),
 
 
65
  article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> | <a href='https://github.com/alibabasglab/MossFormer' target='_blank'>Github Repo</a></p>"
66
  "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> | <a href='https://github.com/alibabasglab/MossFormer2' target='_blank'>Github Repo</a></p>"),
67
  examples = [
 
3
  import gradio as gr
4
  from clearvoice import ClearVoice
5
 
6
+ def fn_clearvoice_se(input_wav, sr):
7
+ if sr[0] == '16 kHz':
8
+ myClearVoice = ClearVoice(task='speech_enhancement', model_names=['FRCRN_SE_16K'])
9
+ fs = 16000
10
+ else:
11
+ myClearVoice = ClearVoice(task='speech_enhancement', model_names=['MossFormer2_SE_48K'])
12
+ fs = 48000
13
  output_wav_dict = myClearVoice(input_path=input_wav, online_write=False)
14
  if isinstance(output_wav_dict, dict):
15
  key = next(iter(output_wav_dict))
16
  output_wav = output_wav_dict[key]
17
  else:
18
  output_wav = output_wav_dict
19
+ sf.write('enhanced.wav', output_wav, fs)
20
  return 'enhanced.wav'
21
 
22
  def fn_clearvoice_ss(input_wav):
 
41
  fn=fn_clearvoice_se,
42
  inputs = [
43
  gr.Audio(label="Input Audio", type="filepath"),
44
+ gr.Dropdown(
45
+ ["16 kHz", "48 kHz"], value=["16 kHz"], multiselect=False, label="Sampling Rate", info="Choose the sampling rate for your output."
46
+ ),
47
  ],
48
  outputs = [
49
  gr.Audio(label="Output Audio", type="filepath"),
50
  ],
51
  title = "ClearVoice: Speech Enhancement",
52
+ description = ("Gradio demo for Speech enhancement with ClearVoice. The models support audios with 16 kHz (FRCRN backbone) and 48 kHz (MossFormer2 backbone) sampling rates. "
53
+ "We provide the generalized models trained on large scale of data for handling various of background environments. "
54
+ "To test it, simply upload your audio, or click one of the examples to load them. Read more at the links below."),
55
  article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> | <a href='https://github.com/alibabasglab/FRCRN' target='_blank'>Github Repo</a></p>"
56
  ),
57
  examples = [
58
+ ["examples/mandarin_speech_16kHz.wav", "16 kHz"],
59
+ ["examples/english_speech_48kHz.wav", "48 kHz"],
60
  ],
61
  cache_examples = True,
62
  )
 
71
  gr.Audio(label="Output Audio", type="filepath"),
72
  ],
73
  title = "ClearVoice: Speech Separation",
74
+ description = ("Gradio demo for Speech separation with ClearVoice. The model (MossFormer2 backbone) supports 2 speakers' audio mixtures with 16 kHz sampling rate. "
75
+ "We provide the generalized models trained on large scale of data for handling independent speakers and various of background environments. "
76
+ "To test it, simply upload your audio, or click one of the examples to load them. Read more at the links below."),
77
  article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> | <a href='https://github.com/alibabasglab/MossFormer' target='_blank'>Github Repo</a></p>"
78
  "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> | <a href='https://github.com/alibabasglab/MossFormer2' target='_blank'>Github Repo</a></p>"),
79
  examples = [