|
import os |
|
import noisereduce as nr |
|
import soundfile as sf |
|
|
|
import string |
|
import json |
|
from glob import glob |
|
import torchaudio |
|
import subprocess |
|
import shutil |
|
import pyloudnorm as pyln |
|
import torch |
|
from TTS.api import TTS |
|
import string |
|
|
|
def remove_punctuation(sentence): |
|
translator = str.maketrans('', '', string.punctuation) |
|
sentence = sentence.translate(translator) |
|
|
|
|
|
sentence = sentence.replace('\n', ' ').replace('\r', '') |
|
|
|
return sentence |
|
|
|
|
|
def run_audio_generation_v1(new_text,accent='None'): |
|
|
|
new_text = new_text.replace('\n', ' ').replace('\r', '') |
|
new_text_mod = remove_punctuation(new_text) |
|
|
|
new_text_split = new_text_mod.split() |
|
for word in new_text_split: |
|
if len(word)>=2 and word.isupper(): |
|
new_text = new_text.replace(word, " ".join([*word])) |
|
|
|
models = TTS().list_models() |
|
with open('models.txt', 'w') as f: |
|
f.writelines(f"{model}\n" for model in models) |
|
gpu = True if torch.cuda.is_available() else False |
|
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=gpu) |
|
|
|
|
|
|
|
|
|
|
|
speaker_wav_data, speaker_wav_rate = sf.read("./tmp/audio/input_src/0.wav") |
|
speaker_wav_data_no_noise = nr.reduce_noise(y=speaker_wav_data, sr=speaker_wav_rate) |
|
sf.write('./tmp/audio/speaker_wav.wav', speaker_wav_data_no_noise, speaker_wav_rate, subtype='PCM_16') |
|
|
|
tts.tts_to_file( |
|
new_text, |
|
speaker_wav="./tmp/audio/speaker_wav.wav", |
|
language="en", |
|
file_path="./tmp/audio/generated-custom.wav" |
|
) |