Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2 |
import os
|
3 |
import logging
|
4 |
import re_matching
|
|
|
5 |
from tools.sentence import split_by_language
|
6 |
|
7 |
logging.getLogger("numba").setLevel(logging.WARNING)
|
@@ -35,7 +36,7 @@ device = config.webui_config.device
|
|
35 |
if device == "mps":
|
36 |
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
|
37 |
|
38 |
-
|
39 |
def generate_audio(
|
40 |
slices,
|
41 |
sdp_ratio,
|
@@ -79,7 +80,7 @@ def generate_audio(
|
|
79 |
audio_list.append(audio16bit)
|
80 |
return audio_list
|
81 |
|
82 |
-
|
83 |
def generate_audio_multilang(
|
84 |
slices,
|
85 |
sdp_ratio,
|
@@ -119,7 +120,7 @@ def generate_audio_multilang(
|
|
119 |
audio_list.append(audio16bit)
|
120 |
return audio_list
|
121 |
|
122 |
-
|
123 |
def tts_split(
|
124 |
text: str,
|
125 |
speaker,
|
@@ -191,7 +192,7 @@ def tts_split(
|
|
191 |
audio_concat = np.concatenate(audio_list)
|
192 |
return ("Success", (hps.data.sampling_rate, audio_concat))
|
193 |
|
194 |
-
|
195 |
def process_mix(slice):
|
196 |
_speaker = slice.pop()
|
197 |
_text, _lang = [], []
|
@@ -211,7 +212,7 @@ def process_mix(slice):
|
|
211 |
_lang += [[lang] for part in content[1:]]
|
212 |
return _text, _lang, _speaker
|
213 |
|
214 |
-
|
215 |
def process_auto(text):
|
216 |
_text, _lang = [], []
|
217 |
for slice in text.split("|"):
|
@@ -228,7 +229,7 @@ def process_auto(text):
|
|
228 |
_lang.append(temp_lang)
|
229 |
return _text, _lang
|
230 |
|
231 |
-
|
232 |
def process_text(
|
233 |
text: str,
|
234 |
speaker,
|
@@ -303,7 +304,7 @@ def process_text(
|
|
303 |
)
|
304 |
return audio_list
|
305 |
|
306 |
-
|
307 |
def tts_fn(
|
308 |
text: str,
|
309 |
speaker,
|
@@ -345,7 +346,7 @@ def tts_fn(
|
|
345 |
audio_concat = np.concatenate(audio_list)
|
346 |
return "Success", (hps.data.sampling_rate, audio_concat)
|
347 |
|
348 |
-
|
349 |
def format_utils(text, speaker):
|
350 |
_text, _lang = process_auto(text)
|
351 |
res = f"[{speaker}]"
|
@@ -355,13 +356,13 @@ def format_utils(text, speaker):
|
|
355 |
res += "|"
|
356 |
return "mix", res[:-1]
|
357 |
|
358 |
-
|
359 |
def load_audio(path):
|
360 |
audio, sr = librosa.load(path, 48000)
|
361 |
# audio = librosa.resample(audio, 44100, 48000)
|
362 |
return sr, audio
|
363 |
|
364 |
-
|
365 |
def gr_util(item):
|
366 |
if item == "Text prompt":
|
367 |
return {"visible": True, "__type__": "update"}, {
|
|
|
2 |
import os
|
3 |
import logging
|
4 |
import re_matching
|
5 |
+
import spaces
|
6 |
from tools.sentence import split_by_language
|
7 |
|
8 |
logging.getLogger("numba").setLevel(logging.WARNING)
|
|
|
36 |
if device == "mps":
|
37 |
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
|
38 |
|
39 |
+
@spaces.GPU
|
40 |
def generate_audio(
|
41 |
slices,
|
42 |
sdp_ratio,
|
|
|
80 |
audio_list.append(audio16bit)
|
81 |
return audio_list
|
82 |
|
83 |
+
@spaces.GPU
|
84 |
def generate_audio_multilang(
|
85 |
slices,
|
86 |
sdp_ratio,
|
|
|
120 |
audio_list.append(audio16bit)
|
121 |
return audio_list
|
122 |
|
123 |
+
@spaces.GPU
|
124 |
def tts_split(
|
125 |
text: str,
|
126 |
speaker,
|
|
|
192 |
audio_concat = np.concatenate(audio_list)
|
193 |
return ("Success", (hps.data.sampling_rate, audio_concat))
|
194 |
|
195 |
+
@spaces.GPU
|
196 |
def process_mix(slice):
|
197 |
_speaker = slice.pop()
|
198 |
_text, _lang = [], []
|
|
|
212 |
_lang += [[lang] for part in content[1:]]
|
213 |
return _text, _lang, _speaker
|
214 |
|
215 |
+
@spaces.GPU
|
216 |
def process_auto(text):
|
217 |
_text, _lang = [], []
|
218 |
for slice in text.split("|"):
|
|
|
229 |
_lang.append(temp_lang)
|
230 |
return _text, _lang
|
231 |
|
232 |
+
@spaces.GPU
|
233 |
def process_text(
|
234 |
text: str,
|
235 |
speaker,
|
|
|
304 |
)
|
305 |
return audio_list
|
306 |
|
307 |
+
@spaces.GPU
|
308 |
def tts_fn(
|
309 |
text: str,
|
310 |
speaker,
|
|
|
346 |
audio_concat = np.concatenate(audio_list)
|
347 |
return "Success", (hps.data.sampling_rate, audio_concat)
|
348 |
|
349 |
+
@spaces.GPU
|
350 |
def format_utils(text, speaker):
|
351 |
_text, _lang = process_auto(text)
|
352 |
res = f"[{speaker}]"
|
|
|
356 |
res += "|"
|
357 |
return "mix", res[:-1]
|
358 |
|
359 |
+
@spaces.GPU
|
360 |
def load_audio(path):
|
361 |
audio, sr = librosa.load(path, 48000)
|
362 |
# audio = librosa.resample(audio, 44100, 48000)
|
363 |
return sr, audio
|
364 |
|
365 |
+
@spaces.GPU
|
366 |
def gr_util(item):
|
367 |
if item == "Text prompt":
|
368 |
return {"visible": True, "__type__": "update"}, {
|