Spaces:

oItsMineZ
/

RVC-v2-WebUI

Running

File size: 55,664 Bytes

import os, sys
os.system("pip install pyworld") # ==0.3.3

now_dir = os.getcwd()
sys.path.append(now_dir)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1"

# Download models
shell_script = './tools/dlmodels.sh'
os.system(f'chmod +x {shell_script}')
os.system('apt install git-lfs')
os.system('git lfs install')
os.system('apt-get -y install aria2')
os.system('aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d . -o hubert_base.pt')
try:
    return_code = os.system(shell_script)
    if return_code == 0:
        print("Shell script executed successfully.")
    else:
        print(f"Shell script failed with return code {return_code}")
except Exception as e:
    print(f"An error occurred: {e}")

import logging
import shutil
import threading
import lib.globals.globals as rvc_globals
from LazyImport import lazyload
math = lazyload('math')
import traceback
import warnings
from random import shuffle
from subprocess import Popen
from time import sleep
import json
import pathlib

import fairseq
logging.getLogger("faiss").setLevel(logging.WARNING)
import faiss
gr = lazyload("gradio")
np = lazyload("numpy")
torch = lazyload('torch')
re = lazyload('regex')
SF = lazyload("soundfile")
SFWrite = SF.write
from dotenv import load_dotenv
from sklearn.cluster import MiniBatchKMeans
import datetime

from glob import glob1
import signal
from signal import SIGTERM
import librosa

from configs.config import Config

from infer.modules.vc.modules import VC
from infer.modules.vc.utils import *
from infer.modules.vc.pipeline import Pipeline
import lib.globals.globals as rvc_globals
math = lazyload('math')
ffmpeg = lazyload('ffmpeg')
import nltk
nltk.download('punkt', quiet=True)
from nltk.tokenize import sent_tokenize

import easy_infer
from infer.lib.csvutil import CSVutil

from lib.infer_pack.models import (
    SynthesizerTrnMs256NSFsid,
    SynthesizerTrnMs256NSFsid_nono,
    SynthesizerTrnMs768NSFsid,
    SynthesizerTrnMs768NSFsid_nono,
)
from lib.infer_pack.models_onnx import SynthesizerTrnMsNSFsidM
from infer.lib.audio import load_audio

from sklearn.cluster import MiniBatchKMeans

import time
import csv

from shlex import quote as SQuote

RQuote = lambda val: SQuote(str(val))

tmp = os.path.join(now_dir, "TEMP")
runtime_dir = os.path.join(now_dir, "runtime/Lib/site-packages")
directories = ['logs', 'audios', 'datasets', 'weights', 'audio-others' , 'audio-outputs']

shutil.rmtree(tmp, ignore_errors=True)
shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True)

os.makedirs(tmp, exist_ok=True)
for folder in directories:
    os.makedirs(os.path.join(now_dir, folder), exist_ok=True)

os.makedirs(tmp, exist_ok=True)
os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
os.makedirs(os.path.join(now_dir, "assets/weights"), exist_ok=True)
os.environ["TEMP"] = tmp
warnings.filterwarnings("ignore")
torch.manual_seed(114514)
logging.getLogger("numba").setLevel(logging.WARNING)

logger = logging.getLogger(__name__)

if not os.path.isdir("csvdb/"):
    os.makedirs("csvdb")
    frmnt, stp = open("csvdb/formanting.csv", "w"), open("csvdb/stop.csv", "w")
    frmnt.close()
    stp.close()

global DoFormant, Quefrency, Timbre

try:
    DoFormant, Quefrency, Timbre = CSVutil("csvdb/formanting.csv", "r", "formanting")
    DoFormant = (
        lambda DoFormant: True
        if DoFormant.lower() == "true"
        else (False if DoFormant.lower() == "false" else DoFormant)
    )(DoFormant)
except (ValueError, TypeError, IndexError):
    DoFormant, Quefrency, Timbre = False, 1.0, 1.0
    CSVutil("csvdb/formanting.csv", "w+", "formanting", DoFormant, Quefrency, Timbre)

load_dotenv()
config = Config()
vc = VC(config)

import gradio as gr

if config.dml == True:

    def forward_dml(ctx, x, scale):
        ctx.scale = scale
        res = x.clone().detach()
        return res

    fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml

ngpu = torch.cuda.device_count()
gpu_infos = []
mem = []
if_gpu_ok = False

isinterrupted = 0

class ToolButton(gr.Button, gr.components.FormComponent):
    """Small button with single emoji as text, fits inside gradio forms"""

    def __init__(self, **kwargs):
        super().__init__(variant="tool", **kwargs)

    def get_block_name(self):
        return "button"


hubert_model = None
weight_root = os.getenv("weight_root")
index_root = os.getenv("index_root")
datasets_root = "datasets"
fshift_root = "formantshiftcfg"
audio_root = "audios"
audio_others_root = "audio-others"

sup_audioext = {'wav', 'mp3', 'flac', 'ogg', 'opus',
                'm4a', 'mp4', 'aac', 'alac', 'wma',
                'aiff', 'webm', 'ac3'}

names        = [os.path.join(root, file)
               for root, _, files in os.walk(weight_root)
               for file in files
               if file.endswith((".pth", ".onnx"))]

indexes_list = [os.path.join(root, name)
               for root, _, files in os.walk(index_root, topdown=False) 
               for name in files 
               if name.endswith(".index") and "trained" not in name]

audio_paths  = [os.path.join(root, name)
               for root, _, files in os.walk(audio_root, topdown=False) 
               for name in files
               if name.endswith(tuple(sup_audioext))]

audio_others_paths  = [os.path.join(root, name)
               for root, _, files in os.walk(audio_others_root, topdown=False) 
               for name in files
               if name.endswith(tuple(sup_audioext))]

check_for_name = lambda: sorted(names)[0] if names else ''

set_edge_voice = easy_infer.get_edge_voice()

def update_tts_methods_voice(select_value):
    if select_value == "Edge-tts":
        return {"choices": set_edge_voice, "value": "", "__type__": "update"}

def update_dataset_list(name): # Don't Remove
    new_datasets = []
    for foldername in os.listdir(os.path.join(now_dir, datasets_root)):
        if "." not in foldername:
            new_datasets.append(os.path.join(easy_infer.find_folder_parent(".","pretrained"),"datasets",foldername))
    return gr.Dropdown.update(choices=new_datasets)

def get_indexes():
    indexes_list = [
        os.path.join(dirpath, filename)
        for dirpath, _, filenames in os.walk(index_root)
        for filename in filenames
        if filename.endswith(".index") and "trained" not in filename
    ]
    
    return indexes_list if indexes_list else ''

def get_fshift_presets():
    fshift_presets_list = [
        os.path.join(dirpath, filename)
        for dirpath, _, filenames in os.walk(fshift_root)
        for filename in filenames
        if filename.endswith(".txt")
    ]
    
    return fshift_presets_list if fshift_presets_list else ''

import soundfile as sf

def generate_output_path(output_folder, base_name, extension):
    index = 1
    while True:
        output_path = os.path.join(output_folder, f"{base_name}_{index}.{extension}")
        if not os.path.exists(output_path):
            return output_path
        index += 1

def change_choices():
    names        = [os.path.join(root, file)
                   for root, _, files in os.walk(weight_root)
                   for file in files
                   if file.endswith((".pth", ".onnx"))]
    indexes_list = [os.path.join(root, name) for root, _, files in os.walk(index_root, topdown=False) for name in files if name.endswith(".index") and "trained" not in name]
    audio_paths  = [os.path.join(audio_root, file) for file in os.listdir(os.path.join(now_dir, "audios"))]
    
    return (
        {"choices": sorted(names), "__type__": "update"}, 
        {"choices": sorted(indexes_list), "__type__": "update"}, 
        {"choices": sorted(audio_paths), "__type__": "update"}
    )
def change_choices2():
    names        = [os.path.join(root, file)
                   for root, _, files in os.walk(weight_root)
                   for file in files
                   if file.endswith((".pth", ".onnx"))]
    indexes_list = [os.path.join(root, name) for root, _, files in os.walk(index_root, topdown=False) for name in files if name.endswith(".index") and "trained" not in name]
    

    return (
        {"choices": sorted(names), "__type__": "update"}, 
        {"choices": sorted(indexes_list), "__type__": "update"}, 
    )
def change_choices3():
    
    audio_paths  = [os.path.join(audio_root, file) for file in os.listdir(os.path.join(now_dir, "audios"))]
    audio_others_paths  = [os.path.join(audio_others_root, file) for file in os.listdir(os.path.join(now_dir, "audio-others"))]
    

    return (
        {"choices": sorted(audio_others_paths), "__type__": "update"},
        {"choices": sorted(audio_paths), "__type__": "update"}
    )

def clean():
    return {"value": "", "__type__": "update"}

def if_done(done, p):
    while 1:
        if p.poll() is None:
            sleep(0.5)
        else:
            break
    done[0] = True


def if_done_multi(done, ps):
    while 1:
        flag = 1
        for p in ps:
            if p.poll() is None:
                flag = 0
                sleep(0.5)
                break
        if flag == 1:
            break
    done[0] = True

def formant_enabled(
    cbox, qfrency, tmbre, frmntapply, formantpreset, formant_refresh_button
):
    if cbox:
        DoFormant = True
        CSVutil("csvdb/formanting.csv", "w+", "formanting", DoFormant, qfrency, tmbre)

        return (
            {"value": True, "__type__": "update"},
            {"visible": True, "__type__": "update"},
            {"visible": True, "__type__": "update"},
            {"visible": True, "__type__": "update"},
            {"visible": True, "__type__": "update"},
            {"visible": True, "__type__": "update"},
        )

    else:
        DoFormant = False
        CSVutil("csvdb/formanting.csv", "w+", "formanting", DoFormant, qfrency, tmbre)

        return (
            {"value": False, "__type__": "update"},
            {"visible": False, "__type__": "update"},
            {"visible": False, "__type__": "update"},
            {"visible": False, "__type__": "update"},
            {"visible": False, "__type__": "update"},
            {"visible": False, "__type__": "update"},
            {"visible": False, "__type__": "update"},
        )
        

def formant_apply(qfrency, tmbre):
    Quefrency = qfrency
    Timbre = tmbre
    DoFormant = True
    CSVutil("csvdb/formanting.csv", "w+", "formanting", DoFormant, qfrency, tmbre)

    return (
        {"value": Quefrency, "__type__": "update"},
        {"value": Timbre, "__type__": "update"},
    )

def update_fshift_presets(preset, qfrency, tmbre):

    if preset:  
        with open(preset, 'r') as p:
            content = p.readlines()
            qfrency, tmbre = content[0].strip(), content[1]
            
        formant_apply(qfrency, tmbre)
    else:
        qfrency, tmbre = preset_apply(preset, qfrency, tmbre)
        
    return (
        {"choices": get_fshift_presets(), "__type__": "update"},
        {"value": qfrency, "__type__": "update"},
        {"value": tmbre, "__type__": "update"},
    )

global log_interval

def set_log_interval(exp_dir, batch_size12):
    log_interval = 1
    folder_path = os.path.join(exp_dir, "1_16k_wavs")

    if os.path.isdir(folder_path):
        wav_files_num = len(glob1(folder_path,"*.wav"))

        if wav_files_num > 0:
            log_interval = math.ceil(wav_files_num / batch_size12)
            if log_interval > 1:
                log_interval += 1

    return log_interval

global PID, PROCESS

import re as regex
import scipy.io.wavfile as wavfile

cli_current_page = "HOME"

def cli_split_command(com):
    exp = r'(?:(?<=\s)|^)"(.*?)"(?=\s|$)|(\S+)'
    split_array = regex.findall(exp, com)
    split_array = [group[0] if group[0] else group[1] for group in split_array]
    return split_array


def execute_generator_function(genObject):
    for _ in genObject:
        pass

def preset_apply(preset, qfer, tmbr):
    if str(preset) != "":
        with open(str(preset), "r") as p:
            content = p.readlines()
            qfer, tmbr = content[0].split("\n")[0], content[1]
            formant_apply(qfer, tmbr)
    else:
        pass
    return (
        {"value": qfer, "__type__": "update"},
        {"value": tmbr, "__type__": "update"},
    )

def change_page(page):
    global cli_current_page
    cli_current_page = page
    return 0

def switch_pitch_controls(f0method0):
    is_visible = f0method0 != 'rmvpe'

    if rvc_globals.NotesOrHertz:
        return (
            {"visible": False, "__type__": "update"},
            {"visible": is_visible, "__type__": "update"},
            {"visible": False, "__type__": "update"},
            {"visible": is_visible, "__type__": "update"}
        )
    else:
        return (
            {"visible": is_visible, "__type__": "update"},
            {"visible": False, "__type__": "update"},
            {"visible": is_visible, "__type__": "update"},
            {"visible": False, "__type__": "update"}
        )

def match_index(sid0):
    picked = False
    folder = sid0.split(".")[0].split("_")[0]
    parent_dir = "./logs/" + folder
    if os.path.exists(parent_dir):
        for filename in os.listdir(parent_dir.replace("\\", "/")):
            if filename.endswith(".index"):
                for i in range(len(indexes_list)):
                    if indexes_list[i] == (
                        os.path.join(("./logs/" + folder), filename).replace("\\", "/")
                    ):
                        break
                    else:
                        if indexes_list[i] == (
                            os.path.join(
                                ("./logs/" + folder.lower()), filename
                            ).replace("\\", "/")
                        ):
                            parent_dir = "./logs/" + folder.lower()
                            break
                index_path = os.path.join(
                    parent_dir.replace("\\", "/"), filename.replace("\\", "/")
                ).replace("\\", "/")
                return (index_path, index_path)

    else:
        return ("", "")

weights_dir = 'weights/'

def note_to_hz(note_name):
    SEMITONES = {'C': -9, 'C#': -8, 'D': -7, 'D#': -6, 'E': -5, 'F': -4, 'F#': -3, 'G': -2, 'G#': -1, 'A': 0, 'A#': 1, 'B': 2}
    pitch_class, octave = note_name[:-1], int(note_name[-1])
    semitone = SEMITONES[pitch_class]
    note_number = 12 * (octave - 4) + semitone
    frequency = 440.0 * (2.0 ** (1.0/12)) ** note_number
    return frequency

def save_to_wav(record_button):
    if record_button is None:
        pass
    else:
        path_to_file=record_button
        new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")+'.wav'
        new_path='./audios/'+new_name
        shutil.move(path_to_file,new_path)
        return new_name
        
def save_to_wav2_edited(dropbox):
    if dropbox is None:
        pass
    else:
        file_path = dropbox.name
        target_path = os.path.join('audios', os.path.basename(file_path))

        if os.path.exists(target_path):
            os.remove(target_path)
            print('Replacing old dropdown file...')

        shutil.move(file_path, target_path)
    return   
    
def save_to_wav2(dropbox):
    file_path = dropbox.name
    target_path = os.path.join('audios', os.path.basename(file_path))

    if os.path.exists(target_path):
        os.remove(target_path)
        print('Replacing old dropdown file...')

    shutil.move(file_path, target_path)
    return target_path
    
from gtts import gTTS
import edge_tts
import asyncio

def change_choices_fix():
    audio_paths=[]
    for filename in os.listdir("./audios"):
        if filename.endswith(('wav', 'mp3', 'flac', 'ogg', 'opus',
                'm4a', 'mp4', 'aac', 'alac', 'wma',
                'aiff', 'webm', 'ac3')):
            audio_paths.append(os.path.join('./audios',filename).replace('\\', '/'))
    print(audio_paths)
    most_recent_audio = ""
    if audio_paths:
        most_recent_audio = max(audio_paths, key=os.path.getctime)
    return {"choices": sorted(audio_paths), "value": most_recent_audio, "__type__": "update"}

def custom_voice(
        _values, # filter indices
        audio_files, # all audio files
        model_voice_path='',
        transpose=0,
        f0method='pm',
        index_rate_=float(0.66),
        crepe_hop_length_=float(64),
        f0_autotune=False,
        file_index='',
        file_index2='',
        ):

        vc.get_vc(model_voice_path)

        for _value_item in _values:
            filename = "audio2/"+audio_files[_value_item] if _value_item != "converted_tts" else audio_files[0]
            try:
                print(audio_files[_value_item], model_voice_path)
            except:
                pass
            info_, (sample_, audio_output_) = vc.vc_single_dont_save(
                sid=0,
                input_audio_path0=filename, #f"audio2/{filename}",
                input_audio_path1=filename, #f"audio2/{filename}",
                f0_up_key=transpose, # transpose for m to f and reverse 0 12
                f0_file=None,
                f0_method= f0method,
                file_index= file_index, # dir pwd?
                file_index2= file_index2,
                # file_big_npy1,
                index_rate= index_rate_,
                filter_radius= int(3),
                resample_sr= int(0),
                rms_mix_rate= float(0.25),
                protect= float(0.33),
                crepe_hop_length= crepe_hop_length_,
                f0_autotune=f0_autotune,
                f0_min=50,
                note_min=50,
                f0_max=1100,
                note_max=1100
            )

            sf.write(
                file= filename, #f"audio2/{filename}",
                samplerate=sample_,
                data=audio_output_
            )

def make_test( 
        tts_text, 
        tts_voice, 
        model_path,
        index_path,
        transpose,
        f0_method,
        index_rate,
        crepe_hop_length,
        f0_autotune,
        tts_method
        ):

        if tts_voice == None:
            return
        
        filename = os.path.join(now_dir, "audio-outputs", "converted_tts.wav")
        if "SET_LIMIT" == os.getenv("DEMO"):
          if len(tts_text) > 60:
            tts_text = tts_text[:60]
            print("DEMO; limit to 60 characters")

        language = tts_voice[:2]
        if tts_method == "Edge-tts":
            try:
                asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save(filename))
            except:
               try:
                  tts = gTTS(tts_text, lang=language)
                  tts.save(filename)
                  tts.save
                  print(f'No audio was received. Please change the tts voice for {tts_voice}. USING gTTS.')
               except:
                tts = gTTS('a', lang=language)
                tts.save(filename)
                print('Error: Audio will be replaced.')
    
            os.system("cp audio-outputs/converted_tts.wav audio-outputs/real_tts.wav")

            custom_voice(
                ["converted_tts"], # filter indices
                ["audio-outputs/converted_tts.wav"], # all audio files
                model_voice_path=model_path,
                transpose=transpose,
                f0method=f0_method,
                index_rate_=index_rate,
                crepe_hop_length_=crepe_hop_length,
                f0_autotune=f0_autotune,
                file_index='',
                file_index2=index_path,
            )
            return os.path.join(now_dir, "audio-outputs", "converted_tts.wav"), os.path.join(now_dir, "audio-outputs", "real_tts.wav")

def_text = "อย่าลืมที่จะกดไลค์ และกดซับสะไค้ร์ช่องโออิสไมซี เพื่อไม่พลาดมีมใหม่ๆเวลาอัพโหลด"
def_index = "logs/DaengGuitar/added_IVF473_Flat_nprobe_1_daengguitar_v2.index"

def GradioSetup(UTheme=gr.themes.Soft()):

    default_weight = names[0] if names else '' 

    with gr.Blocks(title="oItsMinez's RVC v2 WebUI", theme=gr.themes.Base(font=[gr.themes.GoogleFont("Noto Sans Thai"), "sans-serif"])) as app:
        gr.Label('oItsMineZ\'s RVC v2 WebUI', show_label=False)
        gr.Markdown(
            "<div align='center'>\n\n"+
            "RVC v2 Model"+
            "[![oItsMineZ's RVC Model](https://img.shields.io/badge/%F0%9F%A4%97_Hugging_Face-_oItsMineZ's%20RVC%20%20Model-yellow?style=for-the-badge&logoColor=yellow)](https://huggingface.co/oItsMineZ/oItsMineZ-RVC-Model)\n\n"+
            "ติดตาม oItsMineZ"+
            "[![oItsMineZ on YouTube](https://img.shields.io/badge/YouTube-FF0000?style=for-the-badge&logo=youtube&logoColor=white)](https://www.youtube.com/@oItsMineZ?sub_confirmation=1)"+
            "</div>"
        )
        with gr.Tabs():
            with gr.TabItem("Info"):
                gr.Markdown("## 📌แนะนำให้โคลน Space นี้ไว้ในบัญชีของคุณ เพื่อการใช้งานที่ดียิ่งขึ้น (ต้องสมัครบัญชี Hugging Face ก่อน)")
                gr.Markdown("[![Duplicate this Space](https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm-dark.svg)](https://huggingface.co/spaces/oItsMineZ/RVC-v2-WebUI?duplicate=true)\n\n") 
            
                gr.HTML("<b><h2> 📄ข้อควรรู้ </h2></b>")
                gr.Markdown("- RVC v2 (Retrieval Based Voice Conversion v2) เป็น AI Voice Model ที่ปรับปรุงมาจาก VITS ที่ทำให้เทรนโมเดลได้ง่ายขึ้น และคุณภาพของเสียงดีขึ้น")
                gr.Markdown("- WebUI นี้ใช้สำหรับเฉพาะ **เสียง Vocal หรือ TTS** เท่านั้น! ถ้าอยากใช้ AI Cover เฉพาให้ใช้ [**ตัวนี้แทน**](https://huggingface.co/spaces/oItsMineZ/RVC-v2-AI-Cover-WebUI)")
                gr.Markdown("- ถ้าอยากแยกเสียงร้องกับเสียงดนตรีออกจากเพลง [**(ให้แยกได้ที่นี่)**](https://huggingface.co/spaces/oItsMineZ/Ultimate-Vocal-Remover-WebUI) แล้วค่อยนำไฟล์ Vocal มาอัพโหลดในนี้")
                gr.Markdown("- ถ้าใช้ในโทรศัพท์ **ห้าม**ออกจากหน้า Web ขณะเว็บกำลังดำเนินการอยู่ เพราะทำให้ไฟล์หายระหว่างขั้นตอนได้")

                gr.HTML("<b><h2> ✨ฟีเจอร์ </h2></b>")
                gr.Markdown("- อัปโหลดไฟล์ Vocal หรือใช้ TTS (Text to Speech) แปลงข้อความเป็นเสียงได้เลย")
                gr.Markdown("- สามารถดาวน์โหลด Model อื่นๆ ได้ที่แท็บ Resources [**(เว็บสำหรับหา Model เพิ่มเติม)**](https://voice-models.com)")
                gr.Markdown("- ที่สำคัญ **อย่าลืม** *Refresh Model* ทุกครั้งเมื่อโหลด Model ใหม่เข้ามา")

                gr.HTML("<b><h2> 📋รายชื่อ Model </h2></b>")
                gr.Markdown("- อาจารย์แดง (DaengGuitar) - 500 Epochs")
                gr.Markdown("- เต้ (TAEEXZENFIRE) - 500 Epochs")
                gr.Markdown("- ท่านศาสดา - 50 Epochs")
                gr.Markdown("- Model ใหม่เร็วๆ นี้ 🤫")

                gr.HTML("<b><h2> 🌐WebUI อื่นๆ </h2></b>")
                gr.Markdown("- AI Cover (เพลงที่มีทำนอง)")
                gr.Markdown("[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%8E%A4%EF%B8%8F_Space-_RVC%20v2%20AI%20Cover%20WebUI-red?style=for-the-badge)](https://huggingface.co/spaces/oItsMineZ/RVC-v2-AI-Cover-WebUI)")

                gr.HTML("<b><h2> ❤️ขอขอบคุณ </h2></b>")
                gr.Markdown("- [**@r3gm**](https://huggingface.co/r3gm) for [***Ultimate Vocal Remover WebUI***](https://huggingface.co/spaces/r3gm/Ultimate-Vocal-Remover-WebUI) and [***RVC Inference HF***](https://huggingface.co/spaces/r3gm/RVC_HFv2)")
            
            with gr.TabItem("RVC Conversion"):
                with gr.Row():
                    sid0 = gr.Dropdown(label="Inferencing voice:", choices=sorted(names), value=default_weight)
                    refresh_button = gr.Button("Refresh", variant="primary")
                    clean_button = gr.Button("Unload voice to save GPU memory", variant="primary")
                    clean_button.click(fn=lambda: ({"value": "", "__type__": "update"}), inputs=[], outputs=[sid0])

                with gr.TabItem("Main Options"):
                    with gr.Row(): 
                        spk_item = gr.Slider(
                            minimum=0,
                            maximum=2333,
                            step=1,
                            label="Select Speaker/Singer ID:",
                            value=0,
                            visible=False,
                            interactive=True,
                        )

                    with gr.Group(): 
                        with gr.Row():
                            with gr.Column(): # First column for audio-related inputs
                                dropbox = gr.File(label="Drag your audio here:")
                                record_button=gr.Audio(source="microphone", type="filepath", label="Or record an audio:")
                                input_audio0 = gr.Textbox(
                                    label="Manual path to the audio file to be processed",
                                    value=os.path.join(now_dir, "audios", "someguy.mp3"),
                                    visible=False
                                )
                                input_audio1 = gr.Dropdown(
                                    label="Auto detect audio path and select from the dropdown:",
                                    choices=sorted(audio_paths),
                                    value='',
                                    interactive=True,
                                )
                                
                                input_audio1.select(fn=lambda:'',inputs=[],outputs=[input_audio0])
                                input_audio0.input(fn=lambda:'',inputs=[],outputs=[input_audio1])
                                
                                dropbox.upload(fn=save_to_wav2, inputs=[dropbox], outputs=[input_audio0]).then(fn=change_choices_fix, inputs=[], outputs=[input_audio1])
                                record_button.change(fn=save_to_wav, inputs=[record_button], outputs=[input_audio0]).then(fn=change_choices_fix, inputs=[], outputs=[input_audio1])

                            best_match_index_path1 = match_index(sid0.value) # Get initial index from default sid0 (first voice model in list)

                            with gr.Column(): # Second column for pitch shift and other options
                                file_index2 = gr.Dropdown(
                                    label="Auto-detect index path and select from the dropdown (**เลือกให้ตรงกับ Model ที่เลือกไว้**):",
                                    choices=get_indexes(),
                                    value=def_index,
                                    interactive=True,
                                    allow_custom_value=True,
                                )
                                index_rate1 = gr.Slider(
                                    minimum=0,
                                    maximum=1,
                                    label="Search feature ratio:",
                                    value=0.75,
                                    interactive=True,
                                )
                                refresh_button.click(
                                    fn=change_choices, inputs=[], outputs=[sid0, file_index2, input_audio1]
                                )
                                with gr.Column():
                                    vc_transform0 = gr.Number(
                                        label="Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12):", value=0
                                    )
        
                    # Create a checkbox for advanced settings
                    advanced_settings_checkbox = gr.Checkbox(
                        value=False,
                        label="Advanced Settings",
                        interactive=True,
                    )

                    # Advanced settings container        
                    with gr.Column(visible=False) as advanced_settings: # Initially hidden
                        with gr.Row(label = "Advanced Settings", open = False):
                            with gr.Column():
                                f0method0 = gr.Radio(
                                    label="Select the pitch extraction algorithm:",
                                    choices=["pm", "harvest", "dio", "crepe", "crepe-tiny", "mangio-crepe", "mangio-crepe-tiny", "rmvpe", "rmvpe+"], 
                                    value="rmvpe+",
                                    interactive=True,
                                )
                                f0_autotune = gr.Checkbox(
                                    label="Enable autotune",
                                    interactive=True
                                )
                                crepe_hop_length = gr.Slider(
                                    minimum=1,
                                    maximum=512,
                                    step=1,
                                    label="Mangio-Crepe Hop Length (Only applies to mangio-crepe): Hop length refers to the time it takes for the speaker to jump to a dramatic pitch. Lower hop lengths take more time to infer but are more pitch accurate.",
                                    value=120,
                                    interactive=True,
                                    visible=False,
                                )
                                filter_radius0 = gr.Slider(
                                    minimum=0,
                                    maximum=7,
                                    label="If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness.",
                                    value=3,
                                    step=1,
                                    interactive=True,
                                )    

                                minpitch_slider = gr.Slider(
                                    label       = "Min pitch:",
                                    info        = "Specify minimal pitch for inference [HZ]",
                                    step        = 0.1,
                                    minimum     = 1,
                                    scale       = 0,
                                    value       = 50,
                                    maximum     = 16000,
                                    interactive = True,
                                    visible     = (not rvc_globals.NotesOrHertz) and (f0method0.value != 'rmvpe'),
                                )
                                minpitch_txtbox = gr.Textbox(
                                    label       = "Min pitch:",
                                    info        = "Specify minimal pitch for inference [NOTE][OCTAVE]",
                                    placeholder = "C5",
                                    visible     = (rvc_globals.NotesOrHertz) and (f0method0.value != 'rmvpe'),
                                    interactive = True,
                                )

                                maxpitch_slider = gr.Slider(
                                    label       = "Max pitch:",
                                    info        = "Specify max pitch for inference [HZ]",
                                    step        = 0.1,
                                    minimum     = 1,
                                    scale       = 0,
                                    value       = 1100,
                                    maximum     = 16000,
                                    interactive = True,
                                    visible     = (not rvc_globals.NotesOrHertz) and (f0method0.value != 'rmvpe'),
                                )
                                maxpitch_txtbox = gr.Textbox(
                                    label       = "Max pitch:",
                                    info        = "Specify max pitch for inference [NOTE][OCTAVE]",
                                    placeholder = "C6",
                                    visible     = (rvc_globals.NotesOrHertz) and (f0method0.value != 'rmvpe'),
                                    interactive = True,
                                )

                            with gr.Column():
                                file_index1 = gr.Textbox(
                                    label="Feature search database file path:",
                                    value="",
                                    interactive=True,
                                )
                            
                                with gr.Accordion(label = "Custom f0 [Root pitch] File", open = False):
                                    f0_file = gr.File(label="F0 curve file (optional). One pitch per line. Replaces the default F0 and pitch modulation:")

                            f0method0.change(
                                fn=lambda radio: (
                                    {
                                        "visible": radio in ['mangio-crepe', 'mangio-crepe-tiny'],
                                        "__type__": "update"
                                    }
                                ),
                                inputs=[f0method0],
                                outputs=[crepe_hop_length]
                            )

                            f0method0.change(
                                fn=switch_pitch_controls,
                                inputs=[f0method0],
                                outputs=[minpitch_slider, minpitch_txtbox,
                                         maxpitch_slider, maxpitch_txtbox]
                            )                            
                            
                            with gr.Column():
                                resample_sr0 = gr.Slider(
                                    minimum=0,
                                    maximum=48000,
                                    label="Resample the output audio in post-processing to the final sample rate. Set to 0 for no resampling:",
                                    value=0,
                                    step=1,
                                    interactive=True,
                                )
                                rms_mix_rate0 = gr.Slider(
                                    minimum=0,
                                    maximum=1,
                                    label="Use the volume envelope of the input to replace or mix with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is used:",
                                    value=0.25,
                                    interactive=True,
                                )
                                protect0 = gr.Slider(
                                    minimum=0,
                                    maximum=0.5,
                                    label="Protect voiceless consonants and breath sounds to prevent artifacts such as tearing in electronic music. Set to 0.5 to disable. Decrease the value to increase protection, but it may reduce indexing accuracy:",
                                    value=0.33,
                                    step=0.01,
                                    interactive=True,
                                )
                                formanting = gr.Checkbox(
                                    value=bool(DoFormant),
                                    label="Formant shift inference audio",
                                    info="Used for male to female and vice-versa conversions",
                                    interactive=True,
                                    visible=True,
                                )
                                
                                formant_preset = gr.Dropdown(
                                    value='',
                                    choices=get_fshift_presets(),
                                    label="Browse presets for formanting",
                                    info="Presets are located in formantshiftcfg/ folder",
                                    visible=bool(DoFormant),
                                )
                                
                                formant_refresh_button = gr.Button(
                                    value='\U0001f504',
                                    visible=bool(DoFormant),
                                    variant='primary',
                                )
                                
                                qfrency = gr.Slider(
                                        value=Quefrency,
                                        info="Default value is 1.0",
                                        label="Quefrency for formant shifting",
                                        minimum=0.0,
                                        maximum=16.0,
                                        step=0.1,
                                        visible=bool(DoFormant),
                                        interactive=True,
                                )
                                    
                                tmbre = gr.Slider(
                                    value=Timbre,
                                    info="Default value is 1.0",
                                    label="Timbre for formant shifting",
                                    minimum=0.0,
                                    maximum=16.0,
                                    step=0.1,
                                    visible=bool(DoFormant),
                                    interactive=True,
                                )
                                frmntbut = gr.Button(
                                   "Apply", variant="primary", visible=bool(DoFormant)
                                )
                               
                            formant_preset.change(
                                fn=preset_apply,
                                inputs=[formant_preset, qfrency, tmbre],
                                outputs=[qfrency, tmbre],
                            )
                            formanting.change(
                               fn=formant_enabled,
                               inputs=[
                                   formanting,
                                   qfrency,
                                   tmbre,
                                   frmntbut,
                                   formant_preset,
                                   formant_refresh_button,
                               ],
                               outputs=[
                                   formanting,
                                   qfrency,
                                   tmbre,
                                   frmntbut,
                                   formant_preset,
                                   formant_refresh_button,
                               ],
                            )
                            frmntbut.click(
                                fn=formant_apply,
                                inputs=[qfrency, tmbre],
                                outputs=[qfrency, tmbre],
                            )
                            formant_refresh_button.click(
                                fn=update_fshift_presets,
                                inputs=[formant_preset, qfrency, tmbre],
                                outputs=[formant_preset, qfrency, tmbre],
                            )

                    # Function to toggle advanced settings
                    def toggle_advanced_settings(checkbox):
                        return {"visible": checkbox, "__type__": "update"}

                    # Attach the change event
                    advanced_settings_checkbox.change(
                        fn=toggle_advanced_settings,
                        inputs=[advanced_settings_checkbox],
                        outputs=[advanced_settings]
                    )

                    but0 = gr.Button("Convert", variant="primary").style(full_width=True)
                    
                    with gr.Row(): # Defines output info + output audio download after conversion
                        vc_output1 = gr.Textbox(label="Output information:")
                        vc_output2 = gr.Audio(label="Export audio (click on the three dots in the lower right corner to download)")
                        
                    with gr.Group(): # I think this defines the big convert button
                        with gr.Row():
                            but0.click(
                                vc.vc_single,
                                [
                                    spk_item,
                                    input_audio0,
                                    input_audio1,
                                    vc_transform0,
                                    f0_file,
                                    f0method0,
                                    file_index1,
                                    file_index2,
                                    index_rate1,
                                    filter_radius0,
                                    resample_sr0,
                                    rms_mix_rate0,
                                    protect0,
                                    crepe_hop_length,
                                    minpitch_slider, minpitch_txtbox,
                                    maxpitch_slider, maxpitch_txtbox,
                                    f0_autotune
                                ],
                                [vc_output1, vc_output2],
                            )

                    with gr.Group(visible=False): # Markdown explanation of batch inference
                        with gr.Row():
                            with gr.Column():
                                vc_transform1 = gr.Number(value=0)
                                opt_input = gr.Textbox(value="opt")
                            with gr.Column():
                                file_index4 = gr.Dropdown(
                                    choices=get_indexes(),
                                    value=best_match_index_path1,
                                    interactive=True,
                                )
                                sid0.select(fn=match_index, inputs=[sid0], outputs=[file_index2, file_index4])

                                refresh_button.click(
                                    fn=lambda: change_choices()[1],
                                    inputs=[],
                                    outputs=file_index4,
                                )
                                index_rate2 = gr.Slider(
                                    minimum=0,
                                    maximum=1,
                                    value=0.75,
                                    interactive=True,
                                )
                            with gr.Row():
                                dir_input = gr.Textbox(
                                    value=os.path.join(now_dir, "audios"),
                                )
                                inputs = gr.File(file_count="multiple")

                        with gr.Row():
                            with gr.Column():
                                advanced_settings_batch_checkbox = gr.Checkbox(
                                    value=False,
                                    interactive=True,
                                )
                            
                                # Advanced batch settings container        
                                with gr.Row(visible=False) as advanced_settings_batch: # Initially hidden
                                    with gr.Row(label = "Advanced Settings", open = False):
                                        with gr.Column():
                                            file_index3 = gr.Textbox(
                                                value="",
                                                interactive=True,
                                            )

                                    f0method1 = gr.Radio(
                                        choices=["pm", "harvest", "crepe", "rmvpe"],
                                        value="rmvpe",
                                        interactive=True,
                                    )
                                    f0_autotune = gr.Checkbox(
                                    label="Enable autotune",
                                    interactive=True
                                    )
                                    filter_radius1 = gr.Slider(
                                        minimum=0,
                                        maximum=7,
                                        value=3,
                                        step=1,
                                        interactive=True,
                                    )
                                
                                    with gr.Row():
                                        format1 = gr.Radio(
                                            choices=["wav", "flac", "mp3", "m4a"],
                                            value="wav",
                                            interactive=True,
                                        )
                                        

                                    with gr.Column():
                                        resample_sr1 = gr.Slider(
                                            minimum=0,
                                            maximum=48000,
                                            value=0,
                                            step=1,
                                            interactive=True,
                                        )
                                        rms_mix_rate1 = gr.Slider(
                                            minimum=0,
                                            maximum=1,
                                            value=1,
                                            interactive=True,
                                        )
                                        protect1 = gr.Slider(
                                            minimum=0,
                                            maximum=0.5,
                                            value=0.33,
                                            step=0.01,
                                            interactive=True,
                                        )
                                vc_output3 = gr.Textbox(label="Output information:")
                                but1 = gr.Button("Convert", variant="primary")
                                but1.click(
                                    vc.vc_multi,
                                    [
                                        spk_item,
                                        dir_input,
                                        opt_input,
                                        inputs,
                                        vc_transform1,
                                        f0method1,
                                        file_index3,
                                        file_index4,
                                        index_rate2,
                                        filter_radius1,
                                        resample_sr1,
                                        rms_mix_rate1,
                                        protect1,
                                        format1,
                                        crepe_hop_length,
                                        minpitch_slider if (not rvc_globals.NotesOrHertz) else minpitch_txtbox,
                                        maxpitch_slider if (not rvc_globals.NotesOrHertz) else maxpitch_txtbox,
                                        f0_autotune
                                    ],
                                    [vc_output3],
                                )

                    sid0.change(
                        fn=vc.get_vc,
                        inputs=[sid0, protect0, protect1],
                        outputs=[spk_item, protect0, protect1],
                    )
                    if not sid0.value == '':
                        spk_item, protect0, protect1 = vc.get_vc(sid0.value, protect0, protect1)


                    # Function to toggle advanced settings
                    def toggle_advanced_settings_batch(checkbox):
                        return {"visible": checkbox, "__type__": "update"}

                    # Attach the change event
                    advanced_settings_batch_checkbox.change(
                        fn=toggle_advanced_settings_batch,
                        inputs=[advanced_settings_batch_checkbox],
                        outputs=[advanced_settings_batch]
                    )                           
                                       
                    with gr.Accordion(label="f0method8", visible=False): #Don't Remove
                        with gr.Row():
                            with gr.Column():
                                f0method8 = gr.Radio(
                                    choices=["pm", "harvest", "dio", "crepe", "mangio-crepe", "rmvpe", "rmvpe_gpu"],
                                    value="rmvpe",
                                    interactive=True,
                                )

            with gr.TabItem("TTS"):
                with gr.Group():
                    with gr.Column():
                        text_test = gr.Textbox(label="Text:", placeholder="Enter the text you want to convert to voice...", value=def_text, lines=6)
                            
                with gr.Group():

                    with gr.Column():
                        model_voice_path07 = gr.Dropdown(label='RVC Model:', choices=sorted(names), value=default_weight)
                        best_match_index_path1 = match_index(model_voice_path07.value)    

                        file_index2_07 = gr.Dropdown(
                            label='Select the .index file (**เลือกให้ตรงกับ Model ที่เลือกไว้**):',
                            choices=get_indexes(),
                            value=def_index,
                            interactive=True,
                            allow_custom_value=True,
                            )

                    with gr.Row(): 
                        with gr.Column(): 
                             tts_methods_voice = ["Edge-tts"]
                             ttsmethod_test = gr.Dropdown(tts_methods_voice, value='Edge-tts', label = 'TTS Method:', visible=False)
                             tts_test = gr.Dropdown(set_edge_voice, label = 'TTS Language:', value='th-TH-NiwatNeural-Male', visible=True)
                             ttsmethod_test.change(
                            fn=update_tts_methods_voice,
                            inputs=ttsmethod_test,
                            outputs=tts_test,
                            )
                
                with gr.Row():
                        refresh_button_ = gr.Button("Refresh", variant="primary")
                        refresh_button_.click(fn=change_choices2, inputs=[], outputs=[model_voice_path07, file_index2_07])
                with gr.Row():
                            original_ttsvoice = gr.Audio(label='Audio TTS:')
                            ttsvoice = gr.Audio(label='Audio RVC:')

                with gr.Row():
                        button_test = gr.Button("Convert", variant="primary")

                button_test.click(make_test, inputs=[
                                text_test,
                                tts_test,
                                model_voice_path07,
                                file_index2_07,
                                vc_transform0,
                                f0method8,
                                index_rate1,
                                crepe_hop_length,
                                f0_autotune,
                                ttsmethod_test
                                ], outputs=[ttsvoice, original_ttsvoice])
            
            with gr.TabItem("Resources"):
                gr.Markdown(f"Limit Download Size is {os.getenv('MAX_DOWNLOAD_SIZE')} MB, duplicate the space for modify the limit")
                easy_infer.download_model()
                easy_infer.download_audio()
                # https://huggingface.co/oItsMineZ/oItsMineZ-RVC-Model/resolve/main/DaengGuitar/DaengGuitar.zip

            with gr.TabItem("Settings"):
                with gr.Row():
                    gr.Markdown(value="Pitch settings")
                    noteshertz = gr.Checkbox(
                        label       = "Whether to use note names instead of their hertz value. E.G. [C5, D6] instead of [523.25, 1174.66]Hz",
                        value       = rvc_globals.NotesOrHertz,
                        interactive = True,
                    )
            
            noteshertz.change(fn=lambda nhertz: rvc_globals.__setattr__('NotesOrHertz', nhertz), inputs=[noteshertz], outputs=[])

            noteshertz.change(
                fn=switch_pitch_controls,
                inputs=[f0method0],
                outputs=[
                    minpitch_slider, minpitch_txtbox,
                    maxpitch_slider, maxpitch_txtbox,]
            )
        return app

def GradioRun(app):
    share_gradio_link = config.iscolab or config.paperspace
    concurrency_count = 511
    max_size = 1022

    if (
        config.iscolab or config.paperspace
    ):  
        app.queue(concurrency_count=concurrency_count, max_size=max_size).launch()
    else:
        app.queue(concurrency_count=concurrency_count, max_size=max_size).launch()

if __name__ == "__main__":
    app = GradioSetup(UTheme=config.grtheme)
    GradioRun(app)