import os, sys os.system("pip install pyworld") # ==0.3.3 now_dir = os.getcwd() sys.path.append(now_dir) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ["OPENBLAS_NUM_THREADS"] = "1" os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1" # Download models shell_script = './tools/dlmodels.sh' os.system(f'chmod +x {shell_script}') os.system('apt install git-lfs') os.system('git lfs install') os.system('apt-get -y install aria2') os.system('aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d . -o hubert_base.pt') try: return_code = os.system(shell_script) if return_code == 0: print("Shell script executed successfully.") else: print(f"Shell script failed with return code {return_code}") except Exception as e: print(f"An error occurred: {e}") import logging import shutil import threading import lib.globals.globals as rvc_globals from LazyImport import lazyload math = lazyload('math') import traceback import warnings from random import shuffle from subprocess import Popen from time import sleep import json import pathlib import fairseq logging.getLogger("faiss").setLevel(logging.WARNING) import faiss gr = lazyload("gradio") np = lazyload("numpy") torch = lazyload('torch') re = lazyload('regex') SF = lazyload("soundfile") SFWrite = SF.write from dotenv import load_dotenv from sklearn.cluster import MiniBatchKMeans import datetime from glob import glob1 import signal from signal import SIGTERM import librosa from configs.config import Config from infer.modules.vc.modules import VC from infer.modules.vc.utils import * from infer.modules.vc.pipeline import Pipeline import lib.globals.globals as rvc_globals math = lazyload('math') ffmpeg = lazyload('ffmpeg') import nltk nltk.download('punkt', quiet=True) from nltk.tokenize import sent_tokenize import easy_infer from infer.lib.csvutil import CSVutil from lib.infer_pack.models import ( SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono, SynthesizerTrnMs768NSFsid, SynthesizerTrnMs768NSFsid_nono, ) from lib.infer_pack.models_onnx import SynthesizerTrnMsNSFsidM from infer.lib.audio import load_audio from sklearn.cluster import MiniBatchKMeans import time import csv from shlex import quote as SQuote RQuote = lambda val: SQuote(str(val)) tmp = os.path.join(now_dir, "TEMP") runtime_dir = os.path.join(now_dir, "runtime/Lib/site-packages") directories = ['logs', 'audios', 'datasets', 'weights', 'audio-others' , 'audio-outputs'] shutil.rmtree(tmp, ignore_errors=True) shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True) os.makedirs(tmp, exist_ok=True) for folder in directories: os.makedirs(os.path.join(now_dir, folder), exist_ok=True) os.makedirs(tmp, exist_ok=True) os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True) os.makedirs(os.path.join(now_dir, "assets/weights"), exist_ok=True) os.environ["TEMP"] = tmp warnings.filterwarnings("ignore") torch.manual_seed(114514) logging.getLogger("numba").setLevel(logging.WARNING) logger = logging.getLogger(__name__) if not os.path.isdir("csvdb/"): os.makedirs("csvdb") frmnt, stp = open("csvdb/formanting.csv", "w"), open("csvdb/stop.csv", "w") frmnt.close() stp.close() global DoFormant, Quefrency, Timbre try: DoFormant, Quefrency, Timbre = CSVutil("csvdb/formanting.csv", "r", "formanting") DoFormant = ( lambda DoFormant: True if DoFormant.lower() == "true" else (False if DoFormant.lower() == "false" else DoFormant) )(DoFormant) except (ValueError, TypeError, IndexError): DoFormant, Quefrency, Timbre = False, 1.0, 1.0 CSVutil("csvdb/formanting.csv", "w+", "formanting", DoFormant, Quefrency, Timbre) load_dotenv() config = Config() vc = VC(config) import gradio as gr if config.dml == True: def forward_dml(ctx, x, scale): ctx.scale = scale res = x.clone().detach() return res fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml ngpu = torch.cuda.device_count() gpu_infos = [] mem = [] if_gpu_ok = False isinterrupted = 0 class ToolButton(gr.Button, gr.components.FormComponent): """Small button with single emoji as text, fits inside gradio forms""" def __init__(self, **kwargs): super().__init__(variant="tool", **kwargs) def get_block_name(self): return "button" hubert_model = None weight_root = os.getenv("weight_root") index_root = os.getenv("index_root") datasets_root = "datasets" fshift_root = "formantshiftcfg" audio_root = "audios" audio_others_root = "audio-others" sup_audioext = {'wav', 'mp3', 'flac', 'ogg', 'opus', 'm4a', 'mp4', 'aac', 'alac', 'wma', 'aiff', 'webm', 'ac3'} names = [os.path.join(root, file) for root, _, files in os.walk(weight_root) for file in files if file.endswith((".pth", ".onnx"))] indexes_list = [os.path.join(root, name) for root, _, files in os.walk(index_root, topdown=False) for name in files if name.endswith(".index") and "trained" not in name] audio_paths = [os.path.join(root, name) for root, _, files in os.walk(audio_root, topdown=False) for name in files if name.endswith(tuple(sup_audioext))] audio_others_paths = [os.path.join(root, name) for root, _, files in os.walk(audio_others_root, topdown=False) for name in files if name.endswith(tuple(sup_audioext))] check_for_name = lambda: sorted(names)[0] if names else '' set_edge_voice = easy_infer.get_edge_voice() def update_tts_methods_voice(select_value): if select_value == "Edge-tts": return {"choices": set_edge_voice, "value": "", "__type__": "update"} def update_dataset_list(name): # Don't Remove new_datasets = [] for foldername in os.listdir(os.path.join(now_dir, datasets_root)): if "." not in foldername: new_datasets.append(os.path.join(easy_infer.find_folder_parent(".","pretrained"),"datasets",foldername)) return gr.Dropdown.update(choices=new_datasets) def get_indexes(): indexes_list = [ os.path.join(dirpath, filename) for dirpath, _, filenames in os.walk(index_root) for filename in filenames if filename.endswith(".index") and "trained" not in filename ] return indexes_list if indexes_list else '' def get_fshift_presets(): fshift_presets_list = [ os.path.join(dirpath, filename) for dirpath, _, filenames in os.walk(fshift_root) for filename in filenames if filename.endswith(".txt") ] return fshift_presets_list if fshift_presets_list else '' import soundfile as sf def generate_output_path(output_folder, base_name, extension): index = 1 while True: output_path = os.path.join(output_folder, f"{base_name}_{index}.{extension}") if not os.path.exists(output_path): return output_path index += 1 def change_choices(): names = [os.path.join(root, file) for root, _, files in os.walk(weight_root) for file in files if file.endswith((".pth", ".onnx"))] indexes_list = [os.path.join(root, name) for root, _, files in os.walk(index_root, topdown=False) for name in files if name.endswith(".index") and "trained" not in name] audio_paths = [os.path.join(audio_root, file) for file in os.listdir(os.path.join(now_dir, "audios"))] return ( {"choices": sorted(names), "__type__": "update"}, {"choices": sorted(indexes_list), "__type__": "update"}, {"choices": sorted(audio_paths), "__type__": "update"} ) def change_choices2(): names = [os.path.join(root, file) for root, _, files in os.walk(weight_root) for file in files if file.endswith((".pth", ".onnx"))] indexes_list = [os.path.join(root, name) for root, _, files in os.walk(index_root, topdown=False) for name in files if name.endswith(".index") and "trained" not in name] return ( {"choices": sorted(names), "__type__": "update"}, {"choices": sorted(indexes_list), "__type__": "update"}, ) def change_choices3(): audio_paths = [os.path.join(audio_root, file) for file in os.listdir(os.path.join(now_dir, "audios"))] audio_others_paths = [os.path.join(audio_others_root, file) for file in os.listdir(os.path.join(now_dir, "audio-others"))] return ( {"choices": sorted(audio_others_paths), "__type__": "update"}, {"choices": sorted(audio_paths), "__type__": "update"} ) def clean(): return {"value": "", "__type__": "update"} def if_done(done, p): while 1: if p.poll() is None: sleep(0.5) else: break done[0] = True def if_done_multi(done, ps): while 1: flag = 1 for p in ps: if p.poll() is None: flag = 0 sleep(0.5) break if flag == 1: break done[0] = True def formant_enabled( cbox, qfrency, tmbre, frmntapply, formantpreset, formant_refresh_button ): if cbox: DoFormant = True CSVutil("csvdb/formanting.csv", "w+", "formanting", DoFormant, qfrency, tmbre) return ( {"value": True, "__type__": "update"}, {"visible": True, "__type__": "update"}, {"visible": True, "__type__": "update"}, {"visible": True, "__type__": "update"}, {"visible": True, "__type__": "update"}, {"visible": True, "__type__": "update"}, ) else: DoFormant = False CSVutil("csvdb/formanting.csv", "w+", "formanting", DoFormant, qfrency, tmbre) return ( {"value": False, "__type__": "update"}, {"visible": False, "__type__": "update"}, {"visible": False, "__type__": "update"}, {"visible": False, "__type__": "update"}, {"visible": False, "__type__": "update"}, {"visible": False, "__type__": "update"}, {"visible": False, "__type__": "update"}, ) def formant_apply(qfrency, tmbre): Quefrency = qfrency Timbre = tmbre DoFormant = True CSVutil("csvdb/formanting.csv", "w+", "formanting", DoFormant, qfrency, tmbre) return ( {"value": Quefrency, "__type__": "update"}, {"value": Timbre, "__type__": "update"}, ) def update_fshift_presets(preset, qfrency, tmbre): if preset: with open(preset, 'r') as p: content = p.readlines() qfrency, tmbre = content[0].strip(), content[1] formant_apply(qfrency, tmbre) else: qfrency, tmbre = preset_apply(preset, qfrency, tmbre) return ( {"choices": get_fshift_presets(), "__type__": "update"}, {"value": qfrency, "__type__": "update"}, {"value": tmbre, "__type__": "update"}, ) global log_interval def set_log_interval(exp_dir, batch_size12): log_interval = 1 folder_path = os.path.join(exp_dir, "1_16k_wavs") if os.path.isdir(folder_path): wav_files_num = len(glob1(folder_path,"*.wav")) if wav_files_num > 0: log_interval = math.ceil(wav_files_num / batch_size12) if log_interval > 1: log_interval += 1 return log_interval global PID, PROCESS import re as regex import scipy.io.wavfile as wavfile cli_current_page = "HOME" def cli_split_command(com): exp = r'(?:(?<=\s)|^)"(.*?)"(?=\s|$)|(\S+)' split_array = regex.findall(exp, com) split_array = [group[0] if group[0] else group[1] for group in split_array] return split_array def execute_generator_function(genObject): for _ in genObject: pass def preset_apply(preset, qfer, tmbr): if str(preset) != "": with open(str(preset), "r") as p: content = p.readlines() qfer, tmbr = content[0].split("\n")[0], content[1] formant_apply(qfer, tmbr) else: pass return ( {"value": qfer, "__type__": "update"}, {"value": tmbr, "__type__": "update"}, ) def change_page(page): global cli_current_page cli_current_page = page return 0 def switch_pitch_controls(f0method0): is_visible = f0method0 != 'rmvpe' if rvc_globals.NotesOrHertz: return ( {"visible": False, "__type__": "update"}, {"visible": is_visible, "__type__": "update"}, {"visible": False, "__type__": "update"}, {"visible": is_visible, "__type__": "update"} ) else: return ( {"visible": is_visible, "__type__": "update"}, {"visible": False, "__type__": "update"}, {"visible": is_visible, "__type__": "update"}, {"visible": False, "__type__": "update"} ) def match_index(sid0): picked = False folder = sid0.split(".")[0].split("_")[0] parent_dir = "./logs/" + folder if os.path.exists(parent_dir): for filename in os.listdir(parent_dir.replace("\\", "/")): if filename.endswith(".index"): for i in range(len(indexes_list)): if indexes_list[i] == ( os.path.join(("./logs/" + folder), filename).replace("\\", "/") ): break else: if indexes_list[i] == ( os.path.join( ("./logs/" + folder.lower()), filename ).replace("\\", "/") ): parent_dir = "./logs/" + folder.lower() break index_path = os.path.join( parent_dir.replace("\\", "/"), filename.replace("\\", "/") ).replace("\\", "/") return (index_path, index_path) else: return ("", "") weights_dir = 'weights/' def note_to_hz(note_name): SEMITONES = {'C': -9, 'C#': -8, 'D': -7, 'D#': -6, 'E': -5, 'F': -4, 'F#': -3, 'G': -2, 'G#': -1, 'A': 0, 'A#': 1, 'B': 2} pitch_class, octave = note_name[:-1], int(note_name[-1]) semitone = SEMITONES[pitch_class] note_number = 12 * (octave - 4) + semitone frequency = 440.0 * (2.0 ** (1.0/12)) ** note_number return frequency def save_to_wav(record_button): if record_button is None: pass else: path_to_file=record_button new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")+'.wav' new_path='./audios/'+new_name shutil.move(path_to_file,new_path) return new_name def save_to_wav2_edited(dropbox): if dropbox is None: pass else: file_path = dropbox.name target_path = os.path.join('audios', os.path.basename(file_path)) if os.path.exists(target_path): os.remove(target_path) print('Replacing old dropdown file...') shutil.move(file_path, target_path) return def save_to_wav2(dropbox): file_path = dropbox.name target_path = os.path.join('audios', os.path.basename(file_path)) if os.path.exists(target_path): os.remove(target_path) print('Replacing old dropdown file...') shutil.move(file_path, target_path) return target_path from gtts import gTTS import edge_tts import asyncio def change_choices_fix(): audio_paths=[] for filename in os.listdir("./audios"): if filename.endswith(('wav', 'mp3', 'flac', 'ogg', 'opus', 'm4a', 'mp4', 'aac', 'alac', 'wma', 'aiff', 'webm', 'ac3')): audio_paths.append(os.path.join('./audios',filename).replace('\\', '/')) print(audio_paths) most_recent_audio = "" if audio_paths: most_recent_audio = max(audio_paths, key=os.path.getctime) return {"choices": sorted(audio_paths), "value": most_recent_audio, "__type__": "update"} def custom_voice( _values, # filter indices audio_files, # all audio files model_voice_path='', transpose=0, f0method='pm', index_rate_=float(0.66), crepe_hop_length_=float(64), f0_autotune=False, file_index='', file_index2='', ): vc.get_vc(model_voice_path) for _value_item in _values: filename = "audio2/"+audio_files[_value_item] if _value_item != "converted_tts" else audio_files[0] try: print(audio_files[_value_item], model_voice_path) except: pass info_, (sample_, audio_output_) = vc.vc_single_dont_save( sid=0, input_audio_path0=filename, #f"audio2/{filename}", input_audio_path1=filename, #f"audio2/{filename}", f0_up_key=transpose, # transpose for m to f and reverse 0 12 f0_file=None, f0_method= f0method, file_index= file_index, # dir pwd? file_index2= file_index2, # file_big_npy1, index_rate= index_rate_, filter_radius= int(3), resample_sr= int(0), rms_mix_rate= float(0.25), protect= float(0.33), crepe_hop_length= crepe_hop_length_, f0_autotune=f0_autotune, f0_min=50, note_min=50, f0_max=1100, note_max=1100 ) sf.write( file= filename, #f"audio2/{filename}", samplerate=sample_, data=audio_output_ ) def make_test( tts_text, tts_voice, model_path, index_path, transpose, f0_method, index_rate, crepe_hop_length, f0_autotune, tts_method ): if tts_voice == None: return filename = os.path.join(now_dir, "audio-outputs", "converted_tts.wav") if "SET_LIMIT" == os.getenv("DEMO"): if len(tts_text) > 60: tts_text = tts_text[:60] print("DEMO; limit to 60 characters") language = tts_voice[:2] if tts_method == "Edge-tts": try: asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save(filename)) except: try: tts = gTTS(tts_text, lang=language) tts.save(filename) tts.save print(f'No audio was received. Please change the tts voice for {tts_voice}. USING gTTS.') except: tts = gTTS('a', lang=language) tts.save(filename) print('Error: Audio will be replaced.') os.system("cp audio-outputs/converted_tts.wav audio-outputs/real_tts.wav") custom_voice( ["converted_tts"], # filter indices ["audio-outputs/converted_tts.wav"], # all audio files model_voice_path=model_path, transpose=transpose, f0method=f0_method, index_rate_=index_rate, crepe_hop_length_=crepe_hop_length, f0_autotune=f0_autotune, file_index='', file_index2=index_path, ) return os.path.join(now_dir, "audio-outputs", "converted_tts.wav"), os.path.join(now_dir, "audio-outputs", "real_tts.wav") def_text = "อย่าลืมที่จะกดไลค์ และกดซับสะไค้ร์ช่องโออิสไมซี เพื่อไม่พลาดมีมใหม่ๆเวลาอัพโหลด" def_index = "logs/DaengGuitar/added_IVF473_Flat_nprobe_1_daengguitar_v2.index" def GradioSetup(UTheme=gr.themes.Soft()): default_weight = names[0] if names else '' with gr.Blocks(title="oItsMinez's RVC v2 WebUI", theme=gr.themes.Base(font=[gr.themes.GoogleFont("Noto Sans Thai"), "sans-serif"])) as app: gr.Label('oItsMineZ\'s RVC v2 WebUI', show_label=False) gr.Markdown( "