Respair commited on
Commit
d41c3ed
ยท
verified ยท
1 Parent(s): 45f825b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -11
app.py CHANGED
@@ -39,7 +39,7 @@ theme = gr.themes.Base(
39
 
40
  from Modules.diffusion.sampler import DiffusionSampler, ADPM2Sampler, KarrasSchedule
41
 
42
- voicelist = ['VO_JA_Kamisato_Ayaka_About_Kujou_Sara','hontonokimochi','gaen_original']
43
  voices = {}
44
  # import phonemizer
45
  # global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True)
@@ -73,7 +73,7 @@ def synthesize(text, voice, lngsteps, password, progress=gr.Progress()):
73
  audios = []
74
  for t in progress.tqdm(texts):
75
  print(t)
76
- audios.append(styletts2importable.inference(t, voices[v], alpha=0.3, beta=0.7, diffusion_steps=lngsteps, embedding_scale=1))
77
  return (24000, np.concatenate(audios))
78
  # def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
79
  # if password == os.environ['ACCESS_CODE']:
@@ -165,19 +165,35 @@ def ljsynthesize(text, steps,embscale, progress=gr.Progress()):
165
  return (24000, np.concatenate(audios))
166
 
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  with gr.Blocks() as vctk:
169
  with gr.Row():
170
  with gr.Column(scale=1):
171
- clinp = gr.Textbox(label="Text", info="Enter the text | ใƒ†ใ‚ญใ‚นใƒˆใ‚’ๅ…ฅใ‚Œใฆใใ ใ•ใ„ใ€็Ÿญใ™ใŽใ‚‹ใจใฒใฉใใชใ‚Šใพใ™",value="ใ‚ใชใŸใŒใ„ใชใ„ใจใ€ไธ–็•Œใฏ่‰ฒ่คชใ›ใฆ่ฆ‹ใˆใพใ™ใ€‚ใ‚ใชใŸใฎ็ฌ‘้ก”ใŒ็งใฎๆ—ฅใ€…ใ‚’ๆ˜Žใ‚‹ใ็…งใ‚‰ใ—ใฆใ„ใพใ™ใ€‚ใ‚ใชใŸใŒใ„ใชใ„ๆ—ฅใฏใ€ใพใ‚‹ใงๅ†ฌใฎใ‚ˆใ†ใซๅฏ’ใใ€ๆš—ใ„ใงใ™.", interactive=True)
172
- voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", interactive=True)
173
- vcsteps = gr.Slider(minimum=3, maximum=20, value=5, step=1, label="Diffusion Steps", info="You'll get more variation in the results if you increase it, doesn't necessarily improve anything.| ใ“ใ‚Œใ‚’ไธŠใ’ใŸใ‚‰ใ‚‚ใฃใจใ‚จใƒขใƒผใ‚ทใƒงใƒŠใƒซใช้Ÿณๅฃฐใซใชใ‚Šใพใ™๏ผˆไธ‹ใ’ใŸใ‚‰ใใฎ้€†๏ผ‰ใ€ๅข—ใ‚„ใ—ใ™ใŽใ‚‹ใจใ ใ‚ใซใชใ‚‹ใฎใงใ€ใ”ๆณจๆ„ใใ ใ•ใ„", interactive=True)
174
- embscale = gr.Slider(minimum=1, maximum=10, value=1.8, step=0.1, label="Embedding Scale (READ WARNING BELOW)", info="ใ“ใ‚Œใ‚’ไธŠใ’ใŸใ‚‰ใ‚‚ใฃใจใ‚จใƒขใƒผใ‚ทใƒงใƒŠใƒซใช้Ÿณๅฃฐใซใชใ‚Šใพใ™๏ผˆไธ‹ใ’ใŸใ‚‰ใใฎ้€†๏ผ‰ใ€ๅข—ใ‚„ใ—ใ™ใŽใ‚‹ใจใ ใ‚ใซใชใ‚‹ใฎใงใ€ใ”ๆณจๆ„ใใ ใ•ใ„", interactive=True)
175
- alpha = gr.Slider(minimum=0, maximum=1, value=0.3, step=0.1, label="Alpha", interactive=True)
176
- beta = gr.Slider(minimum=0, maximum=1, value=0.4, step=0.1, label="Beta", interactive=True)
177
  with gr.Column(scale=1):
178
- clbtn = gr.Button("Synthesize", variant="primary")
179
- claudio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#3C82F6'})
180
- clbtn.click(clsynthesize, inputs=[clinp, voice, vcsteps, embscale, alpha, beta], outputs=[claudio], concurrency_limit=4)
 
 
 
 
 
181
  # with gr.Blocks() as clone:
182
  # with gr.Row():
183
  # with gr.Column(scale=1):
 
39
 
40
  from Modules.diffusion.sampler import DiffusionSampler, ADPM2Sampler, KarrasSchedule
41
 
42
+ voicelist = ['1','2','3']
43
  voices = {}
44
  # import phonemizer
45
  # global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True)
 
73
  audios = []
74
  for t in progress.tqdm(texts):
75
  print(t)
76
+ audios.append(styletts2importable.inference(t, voices[v], alpha=0.3, beta=0.4, diffusion_steps=lngsteps, embedding_scale=1.5))
77
  return (24000, np.concatenate(audios))
78
  # def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
79
  # if password == os.environ['ACCESS_CODE']:
 
165
  return (24000, np.concatenate(audios))
166
 
167
 
168
+ # with gr.Blocks() as vctk:
169
+ # with gr.Row():
170
+ # with gr.Column(scale=1):
171
+ # clinp = gr.Textbox(label="Text", info="Enter the text | ใƒ†ใ‚ญใ‚นใƒˆใ‚’ๅ…ฅใ‚Œใฆใใ ใ•ใ„ใ€็Ÿญใ™ใŽใ‚‹ใจใฒใฉใใชใ‚Šใพใ™",value="ใ‚ใชใŸใŒใ„ใชใ„ใจใ€ไธ–็•Œใฏ่‰ฒ่คชใ›ใฆ่ฆ‹ใˆใพใ™ใ€‚ใ‚ใชใŸใฎ็ฌ‘้ก”ใŒ็งใฎๆ—ฅใ€…ใ‚’ๆ˜Žใ‚‹ใ็…งใ‚‰ใ—ใฆใ„ใพใ™ใ€‚ใ‚ใชใŸใŒใ„ใชใ„ๆ—ฅใฏใ€ใพใ‚‹ใงๅ†ฌใฎใ‚ˆใ†ใซๅฏ’ใใ€ๆš—ใ„ใงใ™.", interactive=True)
172
+ # voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", interactive=True)
173
+ # vcsteps = gr.Slider(minimum=3, maximum=20, value=5, step=1, label="Diffusion Steps", info="You'll get more variation in the results if you increase it, doesn't necessarily improve anything.| ใ“ใ‚Œใ‚’ไธŠใ’ใŸใ‚‰ใ‚‚ใฃใจใ‚จใƒขใƒผใ‚ทใƒงใƒŠใƒซใช้Ÿณๅฃฐใซใชใ‚Šใพใ™๏ผˆไธ‹ใ’ใŸใ‚‰ใใฎ้€†๏ผ‰ใ€ๅข—ใ‚„ใ—ใ™ใŽใ‚‹ใจใ ใ‚ใซใชใ‚‹ใฎใงใ€ใ”ๆณจๆ„ใใ ใ•ใ„", interactive=True)
174
+ # embscale = gr.Slider(minimum=1, maximum=10, value=1.8, step=0.1, label="Embedding Scale (READ WARNING BELOW)", info="ใ“ใ‚Œใ‚’ไธŠใ’ใŸใ‚‰ใ‚‚ใฃใจใ‚จใƒขใƒผใ‚ทใƒงใƒŠใƒซใช้Ÿณๅฃฐใซใชใ‚Šใพใ™๏ผˆไธ‹ใ’ใŸใ‚‰ใใฎ้€†๏ผ‰ใ€ๅข—ใ‚„ใ—ใ™ใŽใ‚‹ใจใ ใ‚ใซใชใ‚‹ใฎใงใ€ใ”ๆณจๆ„ใใ ใ•ใ„", interactive=True)
175
+ # alpha = gr.Slider(minimum=0, maximum=1, value=0.3, step=0.1, label="Alpha", interactive=True)
176
+ # beta = gr.Slider(minimum=0, maximum=1, value=0.4, step=0.1, label="Beta", interactive=True)
177
+ # with gr.Column(scale=1):
178
+ # clbtn = gr.Button("Synthesize", variant="primary")
179
+ # claudio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#3C82F6'})
180
+ # clbtn.click(clsynthesize, inputs=[clinp, voice, vcsteps, embscale, alpha, beta], outputs=[claudio], concurrency_limit=4)
181
  with gr.Blocks() as vctk:
182
  with gr.Row():
183
  with gr.Column(scale=1):
184
+ inp = gr.Textbox(label="Text", info="Enter the text | ใƒ†ใ‚ญใ‚นใƒˆใ‚’ๅ…ฅใ‚Œใฆใใ ใ•ใ„ใ€็Ÿญใ™ใŽใ‚‹ใจใฒใฉใใชใ‚Šใพใ™.", value="ใ‚ใชใŸใŒใ„ใชใ„ใจใ€ไธ–็•Œใฏ่‰ฒ่คชใ›ใฆ่ฆ‹ใˆใพใ™ใ€‚ใ‚ใชใŸใฎ็ฌ‘้ก”ใŒ็งใฎๆ—ฅใ€…ใ‚’ๆ˜Žใ‚‹ใ็…งใ‚‰ใ—ใฆใ„ใพใ™ใ€‚ใ‚ใชใŸใŒใ„ใชใ„ๆ—ฅใฏใ€ใพใ‚‹ใงๅ†ฌใฎใ‚ˆใ†ใซๅฏ’ใใ€ๆš—ใ„ใงใ™.", interactive=True)
185
+ voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-2', interactive=True)
186
+ multispeakersteps = gr.Slider(minimum=3, maximum=15, value=3, step=1, label="Diffusion Steps", interactive=True)
187
+ # use_gruut = gr.Checkbox(label="Use alternate phonemizer (Gruut) - Experimental")
 
 
188
  with gr.Column(scale=1):
189
+ btn = gr.Button("Synthesize", variant="primary")
190
+ audio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#3C82F6'})
191
+ btn.click(synthesize, inputs=[inp, voice, multispeakersteps], outputs=[audio], concurrency_limit=4)
192
+
193
+
194
+
195
+
196
+
197
  # with gr.Blocks() as clone:
198
  # with gr.Row():
199
  # with gr.Column(scale=1):