thinhlpg commited on
Commit
376b5d9
·
1 Parent(s): c837795

chores: more clean up

Browse files
Files changed (1) hide show
  1. app.py +20 -67
app.py CHANGED
@@ -11,31 +11,28 @@ os.system("python -m unidic download")
11
 
12
  import csv
13
  import datetime
 
14
  import re
 
 
15
  from io import StringIO
16
 
17
  import gradio as gr
18
-
19
- # langid is used to detect language for longer text
20
- # Most users expect text to be their own language, there is checkbox to disable it
21
- import langid
22
- from huggingface_hub import hf_hub_download, snapshot_download
23
- from TTS.api import TTS
24
  from TTS.tts.configs.xtts_config import XttsConfig
25
  from TTS.tts.models.xtts import Xtts
26
- from underthesea import sent_tokenize
27
- from unidecode import unidecode
28
  from vinorm import TTSnorm
29
 
30
- HF_TOKEN = os.environ.get("HF_TOKEN")
31
-
32
- from huggingface_hub import HfApi
33
 
34
- # will use api to restart space on a unrecoverable error
35
  api = HfApi(token=HF_TOKEN)
36
 
37
  # This will trigger downloading model
38
- print("Downloading if not downloaded Coqui XTTS V2")
39
  checkpoint_dir = "model/"
40
  repo_id = "capleaf/viXTTS"
41
  use_deepspeed = False
@@ -154,13 +151,7 @@ def predict(
154
  )
155
  gr.Warning("Unhandled Exception encounter, please retry in a minute")
156
  print("Cuda device-assert Runtime encountered need restart")
157
- if not DEVICE_ASSERT_DETECTED:
158
- DEVICE_ASSERT_DETECTED = 1
159
- DEVICE_ASSERT_PROMPT = prompt
160
- DEVICE_ASSERT_LANG = language
161
 
162
- # just before restarting save what caused the issue so we can handle it in future
163
- # Uploading Error data only happens for unrecovarable error
164
  error_time = datetime.datetime.now().strftime("%d-%m-%Y-%H:%M:%S")
165
  error_data = [
166
  error_time,
@@ -212,59 +203,28 @@ def predict(
212
  )
213
  else:
214
  print("RuntimeError: non device-side assert error:", str(e))
215
- gr.Warning("Something unexpected happened please retry again.")
216
- return (
217
- None,
218
- None,
219
- None,
220
- None,
221
- )
222
- return (
223
- gr.make_waveform(
224
- audio="output.wav",
225
- ),
226
- "output.wav",
227
- metrics_text,
228
- speaker_wav,
229
- )
230
 
231
 
232
  title = "viXTTS Demo"
233
 
234
- description = """
235
-
236
- <br/>
237
-
238
- This demo is currently running **XTTS v2.0.3** <a href="https://huggingface.co/coqui/XTTS-v2">XTTS</a> is a multilingual text-to-speech and voice-cloning model. This demo features zero-shot voice cloning, however, you can fine-tune XTTS for better results. Leave a star 🌟 on Github <a href="https://github.com/coqui-ai/TTS">🐸TTS</a>, where our open-source inference and training code lives.
239
-
240
- <br/>
241
-
242
- Supported languages: Arabic: ar, Brazilian Portuguese: pt , Mandarin Chinese: zh-cn, Czech: cs, Dutch: nl, English: en, French: fr, German: de, Italian: it, Polish: pl, Russian: ru, Spanish: es, Turkish: tr, Japanese: ja, Korean: ko, Hungarian: hu, Hindi: hi
243
-
244
- <br/>
245
- """
246
-
247
-
248
- article = """
249
-
250
- """
251
 
252
  with gr.Blocks(analytics_enabled=False) as demo:
253
  with gr.Row():
254
  with gr.Column():
255
  gr.Markdown(
256
  """
257
- 😳 Burh
258
  """
259
  )
260
  with gr.Column():
261
  # placeholder to align the image
262
  pass
263
 
264
- with gr.Row():
265
- with gr.Column():
266
- gr.Markdown(description)
267
-
268
  with gr.Row():
269
  with gr.Column():
270
  input_text_gr = gr.Textbox(
@@ -304,19 +264,11 @@ with gr.Blocks(analytics_enabled=False) as demo:
304
  type="filepath",
305
  value="model/samples/nu-luu-loat.wav",
306
  )
307
- mic_gr = gr.Audio(
308
- source="microphone",
309
- type="filepath",
310
- info="Use your microphone to record audio",
311
- label="Use Microphone for Reference",
312
- )
313
  tts_button = gr.Button("Send", elem_id="send-btn", visible=True)
314
 
315
  with gr.Column():
316
- video_gr = gr.Video(label="Waveform Visual")
317
  audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
318
  out_text_gr = gr.Text(label="Metrics")
319
- ref_audio_gr = gr.Audio(label="Reference Audio Used")
320
 
321
  tts_button.click(
322
  predict,
@@ -324,10 +276,11 @@ with gr.Blocks(analytics_enabled=False) as demo:
324
  input_text_gr,
325
  language_gr,
326
  ref_gr,
327
- mic_gr,
328
  ],
329
- outputs=[video_gr, audio_gr, out_text_gr, ref_audio_gr],
 
330
  )
331
 
332
  demo.queue()
333
- demo.launch(debug=True, show_api=True)
 
11
 
12
  import csv
13
  import datetime
14
+ import os
15
  import re
16
+ import time
17
+ import uuid
18
  from io import StringIO
19
 
20
  import gradio as gr
21
+ import torch
22
+ import torchaudio
23
+ from huggingface_hub import HfApi, hf_hub_download, snapshot_download
 
 
 
24
  from TTS.tts.configs.xtts_config import XttsConfig
25
  from TTS.tts.models.xtts import Xtts
 
 
26
  from vinorm import TTSnorm
27
 
28
+ # download for mecab
29
+ # os.system("python -m unidic download")
 
30
 
31
+ HF_TOKEN = os.environ.get("HF_TOKEN")
32
  api = HfApi(token=HF_TOKEN)
33
 
34
  # This will trigger downloading model
35
+ print("Downloading if not downloaded viXTTS")
36
  checkpoint_dir = "model/"
37
  repo_id = "capleaf/viXTTS"
38
  use_deepspeed = False
 
151
  )
152
  gr.Warning("Unhandled Exception encounter, please retry in a minute")
153
  print("Cuda device-assert Runtime encountered need restart")
 
 
 
 
154
 
 
 
155
  error_time = datetime.datetime.now().strftime("%d-%m-%Y-%H:%M:%S")
156
  error_data = [
157
  error_time,
 
203
  )
204
  else:
205
  print("RuntimeError: non device-side assert error:", str(e))
206
+ metrics_text = gr.Warning(
207
+ "Something unexpected happened please retry again."
208
+ )
209
+ return (None, metrics_text)
210
+ return ("output.wav", metrics_text)
 
 
 
 
 
 
 
 
 
 
211
 
212
 
213
  title = "viXTTS Demo"
214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
  with gr.Blocks(analytics_enabled=False) as demo:
217
  with gr.Row():
218
  with gr.Column():
219
  gr.Markdown(
220
  """
221
+ viXTTS Demo
222
  """
223
  )
224
  with gr.Column():
225
  # placeholder to align the image
226
  pass
227
 
 
 
 
 
228
  with gr.Row():
229
  with gr.Column():
230
  input_text_gr = gr.Textbox(
 
264
  type="filepath",
265
  value="model/samples/nu-luu-loat.wav",
266
  )
 
 
 
 
 
 
267
  tts_button = gr.Button("Send", elem_id="send-btn", visible=True)
268
 
269
  with gr.Column():
 
270
  audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
271
  out_text_gr = gr.Text(label="Metrics")
 
272
 
273
  tts_button.click(
274
  predict,
 
276
  input_text_gr,
277
  language_gr,
278
  ref_gr,
279
+ normalize_text,
280
  ],
281
+ outputs=[audio_gr, out_text_gr],
282
+ api_name="predict",
283
  )
284
 
285
  demo.queue()
286
+ demo.launch(debug=True, show_api=True, share=True)