oItsMineZ commited on
Commit
a986973
·
verified ·
1 Parent(s): ba753aa

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +1244 -0
app.py ADDED
@@ -0,0 +1,1244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, sys
2
+ os.system("pip install pyworld") # ==0.3.3
3
+
4
+ now_dir = os.getcwd()
5
+ sys.path.append(now_dir)
6
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
7
+ os.environ["OPENBLAS_NUM_THREADS"] = "1"
8
+ os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1"
9
+
10
+ # Download models
11
+ shell_script = './tools/dlmodels.sh'
12
+ os.system(f'chmod +x {shell_script}')
13
+ os.system('apt install git-lfs')
14
+ os.system('git lfs install')
15
+ os.system('apt-get -y install aria2')
16
+ os.system('aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d . -o hubert_base.pt')
17
+ try:
18
+ return_code = os.system(shell_script)
19
+ if return_code == 0:
20
+ print("Shell script executed successfully.")
21
+ else:
22
+ print(f"Shell script failed with return code {return_code}")
23
+ except Exception as e:
24
+ print(f"An error occurred: {e}")
25
+
26
+ import logging
27
+ import shutil
28
+ import threading
29
+ import lib.globals.globals as rvc_globals
30
+ from LazyImport import lazyload
31
+ math = lazyload('math')
32
+ import traceback
33
+ import warnings
34
+ from random import shuffle
35
+ from subprocess import Popen
36
+ from time import sleep
37
+ import json
38
+ import pathlib
39
+
40
+ import fairseq
41
+ logging.getLogger("faiss").setLevel(logging.WARNING)
42
+ import faiss
43
+ gr = lazyload("gradio")
44
+ np = lazyload("numpy")
45
+ torch = lazyload('torch')
46
+ re = lazyload('regex')
47
+ SF = lazyload("soundfile")
48
+ SFWrite = SF.write
49
+ from dotenv import load_dotenv
50
+ from sklearn.cluster import MiniBatchKMeans
51
+ import datetime
52
+
53
+ from glob import glob1
54
+ import signal
55
+ from signal import SIGTERM
56
+ import librosa
57
+
58
+ from configs.config import Config
59
+
60
+ from infer.modules.vc.modules import VC
61
+ from infer.modules.vc.utils import *
62
+ from infer.modules.vc.pipeline import Pipeline
63
+ import lib.globals.globals as rvc_globals
64
+ math = lazyload('math')
65
+ ffmpeg = lazyload('ffmpeg')
66
+ import nltk
67
+ nltk.download('punkt', quiet=True)
68
+ from nltk.tokenize import sent_tokenize
69
+
70
+ import easy_infer
71
+ from infer.lib.csvutil import CSVutil
72
+
73
+ from lib.infer_pack.models import (
74
+ SynthesizerTrnMs256NSFsid,
75
+ SynthesizerTrnMs256NSFsid_nono,
76
+ SynthesizerTrnMs768NSFsid,
77
+ SynthesizerTrnMs768NSFsid_nono,
78
+ )
79
+ from lib.infer_pack.models_onnx import SynthesizerTrnMsNSFsidM
80
+ from infer.lib.audio import load_audio
81
+
82
+ from sklearn.cluster import MiniBatchKMeans
83
+
84
+ import time
85
+ import csv
86
+
87
+ from shlex import quote as SQuote
88
+
89
+ RQuote = lambda val: SQuote(str(val))
90
+
91
+ tmp = os.path.join(now_dir, "TEMP")
92
+ runtime_dir = os.path.join(now_dir, "runtime/Lib/site-packages")
93
+ directories = ['logs', 'audios', 'datasets', 'weights', 'audio-others' , 'audio-outputs']
94
+
95
+ shutil.rmtree(tmp, ignore_errors=True)
96
+ shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True)
97
+
98
+ os.makedirs(tmp, exist_ok=True)
99
+ for folder in directories:
100
+ os.makedirs(os.path.join(now_dir, folder), exist_ok=True)
101
+
102
+ os.makedirs(tmp, exist_ok=True)
103
+ os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
104
+ os.makedirs(os.path.join(now_dir, "assets/weights"), exist_ok=True)
105
+ os.environ["TEMP"] = tmp
106
+ warnings.filterwarnings("ignore")
107
+ torch.manual_seed(114514)
108
+ logging.getLogger("numba").setLevel(logging.WARNING)
109
+
110
+ logger = logging.getLogger(__name__)
111
+
112
+ if not os.path.isdir("csvdb/"):
113
+ os.makedirs("csvdb")
114
+ frmnt, stp = open("csvdb/formanting.csv", "w"), open("csvdb/stop.csv", "w")
115
+ frmnt.close()
116
+ stp.close()
117
+
118
+ global DoFormant, Quefrency, Timbre
119
+
120
+ try:
121
+ DoFormant, Quefrency, Timbre = CSVutil("csvdb/formanting.csv", "r", "formanting")
122
+ DoFormant = (
123
+ lambda DoFormant: True
124
+ if DoFormant.lower() == "true"
125
+ else (False if DoFormant.lower() == "false" else DoFormant)
126
+ )(DoFormant)
127
+ except (ValueError, TypeError, IndexError):
128
+ DoFormant, Quefrency, Timbre = False, 1.0, 1.0
129
+ CSVutil("csvdb/formanting.csv", "w+", "formanting", DoFormant, Quefrency, Timbre)
130
+
131
+ load_dotenv()
132
+ config = Config()
133
+ vc = VC(config)
134
+
135
+ if config.dml == True:
136
+
137
+ def forward_dml(ctx, x, scale):
138
+ ctx.scale = scale
139
+ res = x.clone().detach()
140
+ return res
141
+
142
+ fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml
143
+
144
+ ngpu = torch.cuda.device_count()
145
+ gpu_infos = []
146
+ mem = []
147
+ if_gpu_ok = False
148
+
149
+ isinterrupted = 0
150
+
151
+ class ToolButton(gr.Button, gr.components.FormComponent):
152
+ """Small button with single emoji as text, fits inside gradio forms"""
153
+
154
+ def __init__(self, **kwargs):
155
+ super().__init__(variant="tool", **kwargs)
156
+
157
+ def get_block_name(self):
158
+ return "button"
159
+
160
+
161
+ hubert_model = None
162
+ weight_root = os.getenv("weight_root")
163
+ index_root = os.getenv("index_root")
164
+ datasets_root = "datasets"
165
+ fshift_root = "formantshiftcfg"
166
+ audio_root = "audios"
167
+ audio_others_root = "audio-others"
168
+
169
+ sup_audioext = {'wav', 'mp3', 'flac', 'ogg', 'opus',
170
+ 'm4a', 'mp4', 'aac', 'alac', 'wma',
171
+ 'aiff', 'webm', 'ac3'}
172
+
173
+ names = [os.path.join(root, file)
174
+ for root, _, files in os.walk(weight_root)
175
+ for file in files
176
+ if file.endswith((".pth", ".onnx"))]
177
+
178
+ indexes_list = [os.path.join(root, name)
179
+ for root, _, files in os.walk(index_root, topdown=False)
180
+ for name in files
181
+ if name.endswith(".index") and "trained" not in name]
182
+
183
+ audio_paths = [os.path.join(root, name)
184
+ for root, _, files in os.walk(audio_root, topdown=False)
185
+ for name in files
186
+ if name.endswith(tuple(sup_audioext))]
187
+
188
+ audio_others_paths = [os.path.join(root, name)
189
+ for root, _, files in os.walk(audio_others_root, topdown=False)
190
+ for name in files
191
+ if name.endswith(tuple(sup_audioext))]
192
+
193
+ check_for_name = lambda: sorted(names)[0] if names else ''
194
+
195
+ set_edge_voice = easy_infer.get_edge_voice()
196
+
197
+ def update_tts_methods_voice(select_value):
198
+ if select_value == "Edge-tts":
199
+ return {"choices": set_edge_voice, "value": "", "__type__": "update"}
200
+
201
+ def update_dataset_list(name): # Don't Remove
202
+ new_datasets = []
203
+ for foldername in os.listdir(os.path.join(now_dir, datasets_root)):
204
+ if "." not in foldername:
205
+ new_datasets.append(os.path.join(easy_infer.find_folder_parent(".","pretrained"),"datasets",foldername))
206
+ return gr.Dropdown.update(choices=new_datasets)
207
+
208
+ def get_indexes():
209
+ indexes_list = [
210
+ os.path.join(dirpath, filename)
211
+ for dirpath, _, filenames in os.walk(index_root)
212
+ for filename in filenames
213
+ if filename.endswith(".index") and "trained" not in filename
214
+ ]
215
+
216
+ return indexes_list if indexes_list else ''
217
+
218
+ def get_fshift_presets():
219
+ fshift_presets_list = [
220
+ os.path.join(dirpath, filename)
221
+ for dirpath, _, filenames in os.walk(fshift_root)
222
+ for filename in filenames
223
+ if filename.endswith(".txt")
224
+ ]
225
+
226
+ return fshift_presets_list if fshift_presets_list else ''
227
+
228
+ import soundfile as sf
229
+
230
+ def generate_output_path(output_folder, base_name, extension):
231
+ index = 1
232
+ while True:
233
+ output_path = os.path.join(output_folder, f"{base_name}_{index}.{extension}")
234
+ if not os.path.exists(output_path):
235
+ return output_path
236
+ index += 1
237
+
238
+ def change_choices():
239
+ names = [os.path.join(root, file)
240
+ for root, _, files in os.walk(weight_root)
241
+ for file in files
242
+ if file.endswith((".pth", ".onnx"))]
243
+ indexes_list = [os.path.join(root, name) for root, _, files in os.walk(index_root, topdown=False) for name in files if name.endswith(".index") and "trained" not in name]
244
+ audio_paths = [os.path.join(audio_root, file) for file in os.listdir(os.path.join(now_dir, "audios"))]
245
+
246
+ return (
247
+ {"choices": sorted(names), "__type__": "update"},
248
+ {"choices": sorted(indexes_list), "__type__": "update"},
249
+ {"choices": sorted(audio_paths), "__type__": "update"}
250
+ )
251
+ def change_choices2():
252
+ names = [os.path.join(root, file)
253
+ for root, _, files in os.walk(weight_root)
254
+ for file in files
255
+ if file.endswith((".pth", ".onnx"))]
256
+ indexes_list = [os.path.join(root, name) for root, _, files in os.walk(index_root, topdown=False) for name in files if name.endswith(".index") and "trained" not in name]
257
+
258
+
259
+ return (
260
+ {"choices": sorted(names), "__type__": "update"},
261
+ {"choices": sorted(indexes_list), "__type__": "update"},
262
+ )
263
+ def change_choices3():
264
+
265
+ audio_paths = [os.path.join(audio_root, file) for file in os.listdir(os.path.join(now_dir, "audios"))]
266
+ audio_others_paths = [os.path.join(audio_others_root, file) for file in os.listdir(os.path.join(now_dir, "audio-others"))]
267
+
268
+
269
+ return (
270
+ {"choices": sorted(audio_others_paths), "__type__": "update"},
271
+ {"choices": sorted(audio_paths), "__type__": "update"}
272
+ )
273
+
274
+ def clean():
275
+ return {"value": "", "__type__": "update"}
276
+
277
+ def if_done(done, p):
278
+ while 1:
279
+ if p.poll() is None:
280
+ sleep(0.5)
281
+ else:
282
+ break
283
+ done[0] = True
284
+
285
+
286
+ def if_done_multi(done, ps):
287
+ while 1:
288
+ flag = 1
289
+ for p in ps:
290
+ if p.poll() is None:
291
+ flag = 0
292
+ sleep(0.5)
293
+ break
294
+ if flag == 1:
295
+ break
296
+ done[0] = True
297
+
298
+ def formant_enabled(
299
+ cbox, qfrency, tmbre, frmntapply, formantpreset, formant_refresh_button
300
+ ):
301
+ if cbox:
302
+ DoFormant = True
303
+ CSVutil("csvdb/formanting.csv", "w+", "formanting", DoFormant, qfrency, tmbre)
304
+
305
+ return (
306
+ {"value": True, "__type__": "update"},
307
+ {"visible": True, "__type__": "update"},
308
+ {"visible": True, "__type__": "update"},
309
+ {"visible": True, "__type__": "update"},
310
+ {"visible": True, "__type__": "update"},
311
+ {"visible": True, "__type__": "update"},
312
+ )
313
+
314
+ else:
315
+ DoFormant = False
316
+ CSVutil("csvdb/formanting.csv", "w+", "formanting", DoFormant, qfrency, tmbre)
317
+
318
+ return (
319
+ {"value": False, "__type__": "update"},
320
+ {"visible": False, "__type__": "update"},
321
+ {"visible": False, "__type__": "update"},
322
+ {"visible": False, "__type__": "update"},
323
+ {"visible": False, "__type__": "update"},
324
+ {"visible": False, "__type__": "update"},
325
+ {"visible": False, "__type__": "update"},
326
+ )
327
+
328
+
329
+ def formant_apply(qfrency, tmbre):
330
+ Quefrency = qfrency
331
+ Timbre = tmbre
332
+ DoFormant = True
333
+ CSVutil("csvdb/formanting.csv", "w+", "formanting", DoFormant, qfrency, tmbre)
334
+
335
+ return (
336
+ {"value": Quefrency, "__type__": "update"},
337
+ {"value": Timbre, "__type__": "update"},
338
+ )
339
+
340
+ def update_fshift_presets(preset, qfrency, tmbre):
341
+
342
+ if preset:
343
+ with open(preset, 'r') as p:
344
+ content = p.readlines()
345
+ qfrency, tmbre = content[0].strip(), content[1]
346
+
347
+ formant_apply(qfrency, tmbre)
348
+ else:
349
+ qfrency, tmbre = preset_apply(preset, qfrency, tmbre)
350
+
351
+ return (
352
+ {"choices": get_fshift_presets(), "__type__": "update"},
353
+ {"value": qfrency, "__type__": "update"},
354
+ {"value": tmbre, "__type__": "update"},
355
+ )
356
+
357
+ global log_interval
358
+
359
+ def set_log_interval(exp_dir, batch_size12):
360
+ log_interval = 1
361
+ folder_path = os.path.join(exp_dir, "1_16k_wavs")
362
+
363
+ if os.path.isdir(folder_path):
364
+ wav_files_num = len(glob1(folder_path,"*.wav"))
365
+
366
+ if wav_files_num > 0:
367
+ log_interval = math.ceil(wav_files_num / batch_size12)
368
+ if log_interval > 1:
369
+ log_interval += 1
370
+
371
+ return log_interval
372
+
373
+ global PID, PROCESS
374
+
375
+ import re as regex
376
+ import scipy.io.wavfile as wavfile
377
+
378
+ cli_current_page = "HOME"
379
+
380
+ def cli_split_command(com):
381
+ exp = r'(?:(?<=\s)|^)"(.*?)"(?=\s|$)|(\S+)'
382
+ split_array = regex.findall(exp, com)
383
+ split_array = [group[0] if group[0] else group[1] for group in split_array]
384
+ return split_array
385
+
386
+
387
+ def execute_generator_function(genObject):
388
+ for _ in genObject:
389
+ pass
390
+
391
+ def preset_apply(preset, qfer, tmbr):
392
+ if str(preset) != "":
393
+ with open(str(preset), "r") as p:
394
+ content = p.readlines()
395
+ qfer, tmbr = content[0].split("\n")[0], content[1]
396
+ formant_apply(qfer, tmbr)
397
+ else:
398
+ pass
399
+ return (
400
+ {"value": qfer, "__type__": "update"},
401
+ {"value": tmbr, "__type__": "update"},
402
+ )
403
+
404
+ def change_page(page):
405
+ global cli_current_page
406
+ cli_current_page = page
407
+ return 0
408
+
409
+ def switch_pitch_controls(f0method0):
410
+ is_visible = f0method0 != 'rmvpe'
411
+
412
+ if rvc_globals.NotesOrHertz:
413
+ return (
414
+ {"visible": False, "__type__": "update"},
415
+ {"visible": is_visible, "__type__": "update"},
416
+ {"visible": False, "__type__": "update"},
417
+ {"visible": is_visible, "__type__": "update"}
418
+ )
419
+ else:
420
+ return (
421
+ {"visible": is_visible, "__type__": "update"},
422
+ {"visible": False, "__type__": "update"},
423
+ {"visible": is_visible, "__type__": "update"},
424
+ {"visible": False, "__type__": "update"}
425
+ )
426
+
427
+ def match_index(sid0):
428
+ picked = False
429
+ folder = sid0.split(".")[0].split("_")[0]
430
+ parent_dir = "./logs/" + folder
431
+ if os.path.exists(parent_dir):
432
+ for filename in os.listdir(parent_dir.replace("\\", "/")):
433
+ if filename.endswith(".index"):
434
+ for i in range(len(indexes_list)):
435
+ if indexes_list[i] == (
436
+ os.path.join(("./logs/" + folder), filename).replace("\\", "/")
437
+ ):
438
+ break
439
+ else:
440
+ if indexes_list[i] == (
441
+ os.path.join(
442
+ ("./logs/" + folder.lower()), filename
443
+ ).replace("\\", "/")
444
+ ):
445
+ parent_dir = "./logs/" + folder.lower()
446
+ break
447
+ index_path = os.path.join(
448
+ parent_dir.replace("\\", "/"), filename.replace("\\", "/")
449
+ ).replace("\\", "/")
450
+ return (index_path, index_path)
451
+
452
+ else:
453
+ return ("", "")
454
+
455
+ weights_dir = 'weights/'
456
+
457
+ def note_to_hz(note_name):
458
+ SEMITONES = {'C': -9, 'C#': -8, 'D': -7, 'D#': -6, 'E': -5, 'F': -4, 'F#': -3, 'G': -2, 'G#': -1, 'A': 0, 'A#': 1, 'B': 2}
459
+ pitch_class, octave = note_name[:-1], int(note_name[-1])
460
+ semitone = SEMITONES[pitch_class]
461
+ note_number = 12 * (octave - 4) + semitone
462
+ frequency = 440.0 * (2.0 ** (1.0/12)) ** note_number
463
+ return frequency
464
+
465
+ def save_to_wav(record_button):
466
+ if record_button is None:
467
+ pass
468
+ else:
469
+ path_to_file=record_button
470
+ new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")+'.wav'
471
+ new_path='./audios/'+new_name
472
+ shutil.move(path_to_file,new_path)
473
+ return new_name
474
+
475
+ def save_to_wav2_edited(dropbox):
476
+ if dropbox is None:
477
+ pass
478
+ else:
479
+ file_path = dropbox.name
480
+ target_path = os.path.join('audios', os.path.basename(file_path))
481
+
482
+ if os.path.exists(target_path):
483
+ os.remove(target_path)
484
+ print('Replacing old dropdown file...')
485
+
486
+ shutil.move(file_path, target_path)
487
+ return
488
+
489
+ def save_to_wav2(dropbox):
490
+ file_path = dropbox.name
491
+ target_path = os.path.join('audios', os.path.basename(file_path))
492
+
493
+ if os.path.exists(target_path):
494
+ os.remove(target_path)
495
+ print('Replacing old dropdown file...')
496
+
497
+ shutil.move(file_path, target_path)
498
+ return target_path
499
+
500
+ from gtts import gTTS
501
+ import edge_tts
502
+ import asyncio
503
+
504
+
505
+ def change_choices_fix():
506
+ audio_paths=[]
507
+ for filename in os.listdir("./audios"):
508
+ if filename.endswith(('wav', 'mp3', 'flac', 'ogg', 'opus',
509
+ 'm4a', 'mp4', 'aac', 'alac', 'wma',
510
+ 'aiff', 'webm', 'ac3')):
511
+ audio_paths.append(os.path.join('./audios',filename).replace('\\', '/'))
512
+ print(audio_paths)
513
+ most_recent_audio = ""
514
+ if audio_paths:
515
+ most_recent_audio = max(audio_paths, key=os.path.getctime)
516
+ return {"choices": sorted(audio_paths), "value": most_recent_audio, "__type__": "update"}
517
+
518
+
519
+ def custom_voice(
520
+ _values, # filter indices
521
+ audio_files, # all audio files
522
+ model_voice_path='',
523
+ transpose=0,
524
+ f0method='pm',
525
+ index_rate_=float(0.66),
526
+ crepe_hop_length_=float(64),
527
+ f0_autotune=False,
528
+ file_index='',
529
+ file_index2='',
530
+ ):
531
+
532
+ vc.get_vc(model_voice_path)
533
+
534
+ for _value_item in _values:
535
+ filename = "audio2/"+audio_files[_value_item] if _value_item != "converted_tts" else audio_files[0]
536
+ try:
537
+ print(audio_files[_value_item], model_voice_path)
538
+ except:
539
+ pass
540
+ info_, (sample_, audio_output_) = vc.vc_single_dont_save(
541
+ sid=0,
542
+ input_audio_path0=filename, #f"audio2/{filename}",
543
+ input_audio_path1=filename, #f"audio2/{filename}",
544
+ f0_up_key=transpose, # transpose for m to f and reverse 0 12
545
+ f0_file=None,
546
+ f0_method= f0method,
547
+ file_index= file_index, # dir pwd?
548
+ file_index2= file_index2,
549
+ # file_big_npy1,
550
+ index_rate= index_rate_,
551
+ filter_radius= int(3),
552
+ resample_sr= int(0),
553
+ rms_mix_rate= float(0.25),
554
+ protect= float(0.33),
555
+ crepe_hop_length= crepe_hop_length_,
556
+ f0_autotune=f0_autotune,
557
+ f0_min=50,
558
+ note_min=50,
559
+ f0_max=1100,
560
+ note_max=1100
561
+ )
562
+
563
+ sf.write(
564
+ file= filename, #f"audio2/{filename}",
565
+ samplerate=sample_,
566
+ data=audio_output_
567
+ )
568
+
569
+ def make_test(
570
+ tts_text,
571
+ tts_voice,
572
+ model_path,
573
+ index_path,
574
+ transpose,
575
+ f0_method,
576
+ index_rate,
577
+ crepe_hop_length,
578
+ f0_autotune,
579
+ tts_method
580
+ ):
581
+
582
+ if tts_voice == None:
583
+ return
584
+
585
+ filename = os.path.join(now_dir, "audio-outputs", "converted_tts.wav")
586
+ if "SET_LIMIT" == os.getenv("DEMO"):
587
+ if len(tts_text) > 60:
588
+ tts_text = tts_text[:60]
589
+ print("DEMO; limit to 60 characters")
590
+
591
+ language = tts_voice[:2]
592
+ if tts_method == "Edge-tts":
593
+ try:
594
+ asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save(filename))
595
+ except:
596
+ try:
597
+ tts = gTTS(tts_text, lang=language)
598
+ tts.save(filename)
599
+ tts.save
600
+ print(f'No audio was received. Please change the tts voice for {tts_voice}. USING gTTS.')
601
+ except:
602
+ tts = gTTS('a', lang=language)
603
+ tts.save(filename)
604
+ print('Error: Audio will be replaced.')
605
+
606
+ os.system("cp audio-outputs/converted_tts.wav audio-outputs/real_tts.wav")
607
+
608
+ custom_voice(
609
+ ["converted_tts"], # filter indices
610
+ ["audio-outputs/converted_tts.wav"], # all audio files
611
+ model_voice_path=model_path,
612
+ transpose=transpose,
613
+ f0method=f0_method,
614
+ index_rate_=index_rate,
615
+ crepe_hop_length_=crepe_hop_length,
616
+ f0_autotune=f0_autotune,
617
+ file_index='',
618
+ file_index2=index_path,
619
+ )
620
+ return os.path.join(now_dir, "audio-outputs", "converted_tts.wav"), os.path.join(now_dir, "audio-outputs", "real_tts.wav")
621
+
622
+ def_text = "อย่าลืมที่จะกดไลค์ และกดซับสะไค้ร์ช่องโออิสไมซี เพื่อไม่พลาดมีมใหม่ๆเวลาอัพโหลด"
623
+ def_index = "logs/DaengGuitar/added_IVF473_Flat_nprobe_1_daengguitar_v2.index"
624
+
625
+ def GradioSetup(UTheme=gr.themes.Soft()):
626
+
627
+ default_weight = names[0] if names else ''
628
+
629
+ with gr.Blocks(title="oItsMinez's RVC v2 WebUI", theme=gr.themes.Base(font=[gr.themes.GoogleFont("Noto Sans Thai"), "sans-serif"])) as app:
630
+ gr.Label('oItsMineZ\'s RVC v2 WebUI', show_label=False)
631
+ gr.Markdown(
632
+ "<div align='center'>\n\n"+
633
+ "RVC v2 Model"+
634
+ "[![oItsMineZ's RVC Model](https://img.shields.io/badge/%F0%9F%A4%97_Hugging_Face-_oItsMineZ's%20RVC%20%20Model-yellow?style=for-the-badge&logoColor=yellow)](https://huggingface.co/oItsMineZ/oItsMineZ-RVC-Model)\n\n"+
635
+ "ติดตาม oItsMineZ"+
636
+ "[![oItsMineZ on YouTube](https://img.shields.io/badge/YouTube-FF0000?style=for-the-badge&logo=youtube&logoColor=white)](https://www.youtube.com/@oItsMineZ?sub_confirmation=1)"+
637
+ "</div>"
638
+ )
639
+ with gr.Tabs():
640
+ with gr.TabItem("Info"):
641
+ gr.Markdown("## 📌แนะนำให้โคลน Space นี้ไว้ในบัญชีของคุณ เพื่อการใช้งานที่ดียิ่งขึ้น (ต้องสมัครบัญชี Hugging Face ก่อน)")
642
+ gr.Markdown("[![Duplicate this Space](https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm-dark.svg)](https://huggingface.co/spaces/oItsMineZ/RVC-v2-WebUI?duplicate=true)\n\n")
643
+
644
+ gr.HTML("<b><h2> 📄ข้อควรรู้ </h2></b>")
645
+ gr.Markdown("- RVC v2 (Retrieval Based Voice Conversion v2) เป็น AI Voice Model ที่ปรับปรุงมาจาก VITS ที่ทำให้เทรนโมเดลได้ง่ายขึ้น และคุณภาพของเสียงดีขึ้น")
646
+ gr.Markdown("- WebUI นี้ใช้สำหรับเฉพาะ **เสียง Vocal หรือ TTS** เท่านั้น! ถ้าอยากใช้ AI Cover เฉพาให้ใช้ [**ตัวนี้แทน**](https://huggingface.co/spaces/oItsMineZ/RVC-v2-AI-Cover-WebUI)")
647
+ gr.Markdown("- ถ้าอยากแยกเสียงร้องกับเสียงดนตรีออกจากเพลง [**(ให้แยกได้ที่นี่)**](https://huggingface.co/spaces/oItsMineZ/Ultimate-Vocal-Remover-WebUI) แล้วค่อยนำไฟล์ Vocal มาอัพโหลดในนี้")
648
+
649
+ gr.HTML("<b><h2> ✨ฟีเจอร์ </h2></b>")
650
+ gr.Markdown("- อัปโหลดไฟล์ Vocal หรือใช้ TTS (Text to Speech) แปลงข้อความเป็นเสียงได้เลย")
651
+ gr.Markdown("- สามารถดาวน์โหลด Model อื่นๆ ได้ที่แท็บ Resources [**(เว็บสำหรับหา Model เพิ่มเติม)**](https://voice-models.com)")
652
+ gr.Markdown("- ที่สำคัญ **อย่าลืม** *Refresh Model* ทุกครั้งเมื่อโหลด Model ใหม่เข้ามา")
653
+
654
+ gr.HTML("<b><h2> 📋รายชื่อ Model </h2></b>")
655
+ gr.Markdown("- อาจารย์แดง (DaengGuitar) - 500 Epochs")
656
+ gr.Markdown("- เต้ (TAEEXZENFIRE) - 500 Epochs")
657
+ gr.Markdown("- ท่านศาสดา - 50 Epochs")
658
+ gr.Markdown("- Model ใหม่เร็วๆ นี้ 🤫")
659
+
660
+ gr.HTML("<b><h2> 🌐WebUI อื่นๆ </h2></b>")
661
+ gr.Markdown("- AI Cover (เพลงที่มีทำนอง)")
662
+ gr.Markdown("[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%8E%A4%EF%B8%8F_Space-_RVC%20v2%20AI%20Cover%20WebUI-red?style=for-the-badge)](https://huggingface.co/spaces/oItsMineZ/RVC-v2-AI-Cover-WebUI)")
663
+
664
+ gr.HTML("<b><h2> ❤️ขอขอบคุณ </h2></b>")
665
+ gr.Markdown("- [**@r3gm**](https://huggingface.co/r3gm) for [***Ultimate Vocal Remover WebUI***](https://huggingface.co/spaces/r3gm/Ultimate-Vocal-Remover-WebUI) and [***RVC Inference HF***](https://huggingface.co/spaces/r3gm/RVC_HFv2)")
666
+
667
+ with gr.TabItem("RVC Conversion"):
668
+ with gr.Row():
669
+ sid0 = gr.Dropdown(label="Inferencing voice:", choices=sorted(names), value=default_weight)
670
+ refresh_button = gr.Button("Refresh", variant="primary")
671
+ clean_button = gr.Button("Unload voice to save GPU memory", variant="primary")
672
+ clean_button.click(fn=lambda: ({"value": "", "__type__": "update"}), inputs=[], outputs=[sid0])
673
+
674
+ with gr.TabItem("Main Options"):
675
+ with gr.Row():
676
+ spk_item = gr.Slider(
677
+ minimum=0,
678
+ maximum=2333,
679
+ step=1,
680
+ label="Select Speaker/Singer ID:",
681
+ value=0,
682
+ visible=False,
683
+ interactive=True,
684
+ )
685
+
686
+ with gr.Group():
687
+ with gr.Row():
688
+ with gr.Column(): # First column for audio-related inputs
689
+ dropbox = gr.File(label="Drag your audio here:")
690
+ record_button=gr.Audio(source="microphone", label="Or record an audio:", type="filepath")
691
+ input_audio0 = gr.Textbox(
692
+ label="Manual path to the audio file to be processed",
693
+ value=os.path.join(now_dir, "audios", "someguy.mp3"),
694
+ visible=False
695
+ )
696
+ input_audio1 = gr.Dropdown(
697
+ label="Auto detect audio path and select from the dropdown:",
698
+ choices=sorted(audio_paths),
699
+ value='',
700
+ interactive=True,
701
+ )
702
+
703
+ input_audio1.select(fn=lambda:'',inputs=[],outputs=[input_audio0])
704
+ input_audio0.input(fn=lambda:'',inputs=[],outputs=[input_audio1])
705
+
706
+ dropbox.upload(fn=save_to_wav2, inputs=[dropbox], outputs=[input_audio0]).then(fn=change_choices_fix, inputs=[], outputs=[input_audio1])
707
+ record_button.change(fn=save_to_wav, inputs=[record_button], outputs=[input_audio0]).then(fn=change_choices_fix, inputs=[], outputs=[input_audio1])
708
+
709
+ best_match_index_path1 = match_index(sid0.value) # Get initial index from default sid0 (first voice model in list)
710
+
711
+ with gr.Column(): # Second column for pitch shift and other options
712
+ file_index2 = gr.Dropdown(
713
+ label="Auto-detect index path and select from the dropdown (**เลือกให้ตรงกับ Model ที่เลือกไว้**):",
714
+ choices=get_indexes(),
715
+ value=def_index,
716
+ interactive=True,
717
+ allow_custom_value=True,
718
+ )
719
+ index_rate1 = gr.Slider(
720
+ minimum=0,
721
+ maximum=1,
722
+ label="Search feature ratio:",
723
+ value=0.75,
724
+ interactive=True,
725
+ )
726
+ refresh_button.click(
727
+ fn=change_choices, inputs=[], outputs=[sid0, file_index2, input_audio1]
728
+ )
729
+ with gr.Column():
730
+ vc_transform0 = gr.Number(
731
+ label="Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12):", value=0
732
+ )
733
+
734
+ # Create a checkbox for advanced settings
735
+ advanced_settings_checkbox = gr.Checkbox(
736
+ value=False,
737
+ label="Advanced Settings",
738
+ interactive=True,
739
+ )
740
+
741
+ # Advanced settings container
742
+ with gr.Column(visible=False) as advanced_settings: # Initially hidden
743
+ with gr.Row(label = "Advanced Settings", open = False):
744
+ with gr.Column():
745
+ f0method0 = gr.Radio(
746
+ label="Select the pitch extraction algorithm:",
747
+ choices=["pm", "harvest", "dio", "crepe", "crepe-tiny", "mangio-crepe", "mangio-crepe-tiny", "rmvpe", "rmvpe+"],
748
+ value="rmvpe+",
749
+ interactive=True,
750
+ )
751
+ f0_autotune = gr.Checkbox(
752
+ label="Enable autotune",
753
+ interactive=True
754
+ )
755
+ crepe_hop_length = gr.Slider(
756
+ minimum=1,
757
+ maximum=512,
758
+ step=1,
759
+ label="Mangio-Crepe Hop Length (Only applies to mangio-crepe): Hop length refers to the time it takes for the speaker to jump to a dramatic pitch. Lower hop lengths take more time to infer but are more pitch accurate.",
760
+ value=120,
761
+ interactive=True,
762
+ visible=False,
763
+ )
764
+ filter_radius0 = gr.Slider(
765
+ minimum=0,
766
+ maximum=7,
767
+ label="If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness.",
768
+ value=3,
769
+ step=1,
770
+ interactive=True,
771
+ )
772
+
773
+ minpitch_slider = gr.Slider(
774
+ label = "Min pitch:",
775
+ info = "Specify minimal pitch for inference [HZ]",
776
+ step = 0.1,
777
+ minimum = 1,
778
+ scale = 0,
779
+ value = 50,
780
+ maximum = 16000,
781
+ interactive = True,
782
+ visible = (not rvc_globals.NotesOrHertz) and (f0method0.value != 'rmvpe'),
783
+ )
784
+ minpitch_txtbox = gr.Textbox(
785
+ label = "Min pitch:",
786
+ info = "Specify minimal pitch for inference [NOTE][OCTAVE]",
787
+ placeholder = "C5",
788
+ visible = (rvc_globals.NotesOrHertz) and (f0method0.value != 'rmvpe'),
789
+ interactive = True,
790
+ )
791
+
792
+ maxpitch_slider = gr.Slider(
793
+ label = "Max pitch:",
794
+ info = "Specify max pitch for inference [HZ]",
795
+ step = 0.1,
796
+ minimum = 1,
797
+ scale = 0,
798
+ value = 1100,
799
+ maximum = 16000,
800
+ interactive = True,
801
+ visible = (not rvc_globals.NotesOrHertz) and (f0method0.value != 'rmvpe'),
802
+ )
803
+ maxpitch_txtbox = gr.Textbox(
804
+ label = "Max pitch:",
805
+ info = "Specify max pitch for inference [NOTE][OCTAVE]",
806
+ placeholder = "C6",
807
+ visible = (rvc_globals.NotesOrHertz) and (f0method0.value != 'rmvpe'),
808
+ interactive = True,
809
+ )
810
+
811
+ with gr.Column():
812
+ file_index1 = gr.Textbox(
813
+ label="Feature search database file path:",
814
+ value="",
815
+ interactive=True,
816
+ )
817
+
818
+ with gr.Accordion(label = "Custom f0 [Root pitch] File", open = False):
819
+ f0_file = gr.File(label="F0 curve file (optional). One pitch per line. Replaces the default F0 and pitch modulation:")
820
+
821
+ f0method0.change(
822
+ fn=lambda radio: (
823
+ {
824
+ "visible": radio in ['mangio-crepe', 'mangio-crepe-tiny'],
825
+ "__type__": "update"
826
+ }
827
+ ),
828
+ inputs=[f0method0],
829
+ outputs=[crepe_hop_length]
830
+ )
831
+
832
+ f0method0.change(
833
+ fn=switch_pitch_controls,
834
+ inputs=[f0method0],
835
+ outputs=[minpitch_slider, minpitch_txtbox,
836
+ maxpitch_slider, maxpitch_txtbox]
837
+ )
838
+
839
+ with gr.Column():
840
+ resample_sr0 = gr.Slider(
841
+ minimum=0,
842
+ maximum=48000,
843
+ label="Resample the output audio in post-processing to the final sample rate. Set to 0 for no resampling:",
844
+ value=0,
845
+ step=1,
846
+ interactive=True,
847
+ )
848
+ rms_mix_rate0 = gr.Slider(
849
+ minimum=0,
850
+ maximum=1,
851
+ label="Use the volume envelope of the input to replace or mix with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is used:",
852
+ value=0.25,
853
+ interactive=True,
854
+ )
855
+ protect0 = gr.Slider(
856
+ minimum=0,
857
+ maximum=0.5,
858
+ label="Protect voiceless consonants and breath sounds to prevent artifacts such as tearing in electronic music. Set to 0.5 to disable. Decrease the value to increase protection, but it may reduce indexing accuracy:",
859
+ value=0.33,
860
+ step=0.01,
861
+ interactive=True,
862
+ )
863
+ formanting = gr.Checkbox(
864
+ value=bool(DoFormant),
865
+ label="Formant shift inference audio",
866
+ info="Used for male to female and vice-versa conversions",
867
+ interactive=True,
868
+ visible=True,
869
+ )
870
+
871
+ formant_preset = gr.Dropdown(
872
+ value='',
873
+ choices=get_fshift_presets(),
874
+ label="Browse presets for formanting",
875
+ info="Presets are located in formantshiftcfg/ folder",
876
+ visible=bool(DoFormant),
877
+ )
878
+
879
+ formant_refresh_button = gr.Button(
880
+ value='\U0001f504',
881
+ visible=bool(DoFormant),
882
+ variant='primary',
883
+ )
884
+
885
+ qfrency = gr.Slider(
886
+ value=Quefrency,
887
+ info="Default value is 1.0",
888
+ label="Quefrency for formant shifting",
889
+ minimum=0.0,
890
+ maximum=16.0,
891
+ step=0.1,
892
+ visible=bool(DoFormant),
893
+ interactive=True,
894
+ )
895
+
896
+ tmbre = gr.Slider(
897
+ value=Timbre,
898
+ info="Default value is 1.0",
899
+ label="Timbre for formant shifting",
900
+ minimum=0.0,
901
+ maximum=16.0,
902
+ step=0.1,
903
+ visible=bool(DoFormant),
904
+ interactive=True,
905
+ )
906
+ frmntbut = gr.Button(
907
+ "Apply", variant="primary", visible=bool(DoFormant)
908
+ )
909
+
910
+ formant_preset.change(
911
+ fn=preset_apply,
912
+ inputs=[formant_preset, qfrency, tmbre],
913
+ outputs=[qfrency, tmbre],
914
+ )
915
+ formanting.change(
916
+ fn=formant_enabled,
917
+ inputs=[
918
+ formanting,
919
+ qfrency,
920
+ tmbre,
921
+ frmntbut,
922
+ formant_preset,
923
+ formant_refresh_button,
924
+ ],
925
+ outputs=[
926
+ formanting,
927
+ qfrency,
928
+ tmbre,
929
+ frmntbut,
930
+ formant_preset,
931
+ formant_refresh_button,
932
+ ],
933
+ )
934
+ frmntbut.click(
935
+ fn=formant_apply,
936
+ inputs=[qfrency, tmbre],
937
+ outputs=[qfrency, tmbre],
938
+ )
939
+ formant_refresh_button.click(
940
+ fn=update_fshift_presets,
941
+ inputs=[formant_preset, qfrency, tmbre],
942
+ outputs=[formant_preset, qfrency, tmbre],
943
+ )
944
+
945
+ # Function to toggle advanced settings
946
+ def toggle_advanced_settings(checkbox):
947
+ return {"visible": checkbox, "__type__": "update"}
948
+
949
+ # Attach the change event
950
+ advanced_settings_checkbox.change(
951
+ fn=toggle_advanced_settings,
952
+ inputs=[advanced_settings_checkbox],
953
+ outputs=[advanced_settings]
954
+ )
955
+
956
+ but0 = gr.Button("Convert", variant="primary").style(full_width=True)
957
+
958
+ with gr.Row(): # Defines output info + output audio download after conversion
959
+ vc_output1 = gr.Textbox(label="Output information:")
960
+ vc_output2 = gr.Audio(label="Export audio (click on the three dots in the lower right corner to download)")
961
+
962
+ with gr.Group(): # I think this defines the big convert button
963
+ with gr.Row():
964
+ but0.click(
965
+ vc.vc_single,
966
+ [
967
+ spk_item,
968
+ input_audio0,
969
+ input_audio1,
970
+ vc_transform0,
971
+ f0_file,
972
+ f0method0,
973
+ file_index1,
974
+ file_index2,
975
+ index_rate1,
976
+ filter_radius0,
977
+ resample_sr0,
978
+ rms_mix_rate0,
979
+ protect0,
980
+ crepe_hop_length,
981
+ minpitch_slider, minpitch_txtbox,
982
+ maxpitch_slider, maxpitch_txtbox,
983
+ f0_autotune
984
+ ],
985
+ [vc_output1, vc_output2],
986
+ )
987
+
988
+
989
+ with gr.Group(visible=False): # Markdown explanation of batch inference
990
+ with gr.Row(visible=False):
991
+ with gr.Column(visible=False):
992
+ vc_transform1 = gr.Number(
993
+ label="Transpose (integer, number of semitones, raise by an octave: 12, lower by an octave: -12):", value=0, visible=False
994
+ )
995
+ opt_input = gr.Textbox(label="Specify output folder:", value="opt, visible=False")
996
+ with gr.Column(visible=False):
997
+ file_index4 = gr.Dropdown(
998
+ label="Auto-detect index path and select from the dropdown:",
999
+ choices=get_indexes(),
1000
+ value=def_index,
1001
+ interactive=True,
1002
+ )
1003
+ sid0.select(fn=match_index, inputs=[sid0], outputs=[file_index2, file_index4])
1004
+
1005
+ refresh_button.click(
1006
+ fn=lambda: change_choices()[1],
1007
+ inputs=[],
1008
+ outputs=file_index4,
1009
+ )
1010
+ index_rate2 = gr.Slider(
1011
+ minimum=0,
1012
+ maximum=1,
1013
+ label="Search feature ratio:",
1014
+ value=0.75,
1015
+ interactive=True,
1016
+ )
1017
+ with gr.Row(visible=False):
1018
+ dir_input = gr.Textbox(
1019
+ label="Enter the path of the audio folder to be processed (copy it from the address bar of the file manager):",
1020
+ value=os.path.join(now_dir, "audios"),
1021
+ )
1022
+ inputs = gr.File(
1023
+ file_count="multiple", label="You can also input audio files in batches. Choose one of the two options. Priority is given to reading from the folder.", visible=False
1024
+ )
1025
+
1026
+ with gr.Row(visible=False):
1027
+ with gr.Column(visible=False):
1028
+ # Create a checkbox for advanced batch settings
1029
+ advanced_settings_batch_checkbox = gr.Checkbox(
1030
+ value=False,
1031
+ label="Advanced Settings",
1032
+ interactive=True,
1033
+ )
1034
+
1035
+ # Advanced batch settings container
1036
+ with gr.Row(visible=False) as advanced_settings_batch: # Initially hidden
1037
+ with gr.Row(label = "Advanced Settings", open = False):
1038
+ with gr.Column():
1039
+ file_index3 = gr.Textbox(
1040
+ label="Feature search database file path:",
1041
+ value="",
1042
+ interactive=True,
1043
+ )
1044
+
1045
+ f0method1 = gr.Radio(
1046
+ label="Select the pitch extraction algorithm:",
1047
+ choices=["pm", "harvest", "crepe", "rmvpe"],
1048
+ value="rmvpe",
1049
+ interactive=True,
1050
+ )
1051
+ f0_autotune = gr.Checkbox(
1052
+ label="Enable autotune",
1053
+ interactive=True,
1054
+ )
1055
+ filter_radius1 = gr.Slider(
1056
+ minimum=0,
1057
+ maximum=7,
1058
+ label="If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness.",
1059
+ value=3,
1060
+ step=1,
1061
+ interactive=True,
1062
+ )
1063
+
1064
+ with gr.Row(visible=False):
1065
+ format1 = gr.Radio(
1066
+ label="Export file format",
1067
+ choices=["wav", "flac", "mp3", "m4a"],
1068
+ value="wav",
1069
+ interactive=True,
1070
+ )
1071
+
1072
+ with gr.Column(visible=False):
1073
+ resample_sr1 = gr.Slider(
1074
+ minimum=0,
1075
+ maximum=48000,
1076
+ label="Resample the output audio in post-processing to the final sample rate. Set to 0 for no resampling:",
1077
+ value=0,
1078
+ step=1,
1079
+ interactive=True,
1080
+ )
1081
+ rms_mix_rate1 = gr.Slider(
1082
+ minimum=0,
1083
+ maximum=1,
1084
+ label="Use the volume envelope of the input to replace or mix with the volume envelope of the output. The closer the ratio is to 1, the more the output envelope is used:",
1085
+ value=1,
1086
+ interactive=True,
1087
+ )
1088
+ protect1 = gr.Slider(
1089
+ minimum=0,
1090
+ maximum=0.5,
1091
+ label="Protect voiceless consonants and breath sounds to prevent artifacts such as tearing in electronic music. Set to 0.5 to disable. Decrease the value to increase protection, but it may reduce indexing accuracy:",
1092
+ value=0.33,
1093
+ step=0.01,
1094
+ interactive=True,
1095
+ )
1096
+ vc_output3 = gr.Textbox(label="Output information:", visible=False)
1097
+ but1 = gr.Button("Convert", variant="primary", visible=False)
1098
+ but1.click(
1099
+ vc.vc_multi,
1100
+ [
1101
+ spk_item,
1102
+ dir_input,
1103
+ opt_input,
1104
+ inputs,
1105
+ vc_transform1,
1106
+ f0method1,
1107
+ file_index3,
1108
+ file_index4,
1109
+ index_rate2,
1110
+ filter_radius1,
1111
+ resample_sr1,
1112
+ rms_mix_rate1,
1113
+ protect1,
1114
+ format1,
1115
+ crepe_hop_length,
1116
+ minpitch_slider if (not rvc_globals.NotesOrHertz) else minpitch_txtbox,
1117
+ maxpitch_slider if (not rvc_globals.NotesOrHertz) else maxpitch_txtbox,
1118
+ f0_autotune
1119
+ ],
1120
+ [vc_output3],
1121
+ )
1122
+
1123
+ sid0.change(
1124
+ fn=vc.get_vc,
1125
+ inputs=[sid0, protect0, protect1],
1126
+ outputs=[spk_item, protect0, protect1],
1127
+ )
1128
+ if not sid0.value == '':
1129
+ spk_item, protect0, protect1 = vc.get_vc(sid0.value, protect0, protect1)
1130
+
1131
+ # Function to toggle advanced settings
1132
+ def toggle_advanced_settings_batch(checkbox):
1133
+ return {"visible": checkbox, "__type__": "update"}
1134
+
1135
+ # Attach the change event
1136
+ advanced_settings_batch_checkbox.change(
1137
+ fn=toggle_advanced_settings_batch,
1138
+ inputs=[advanced_settings_batch_checkbox],
1139
+ outputs=[advanced_settings_batch]
1140
+ )
1141
+
1142
+ with gr.Accordion(label="f0method8", visible=False): #Don't Remove
1143
+ with gr.Row():
1144
+ with gr.Column():
1145
+ f0method8 = gr.Radio(
1146
+ choices=["pm", "harvest", "dio", "crepe", "mangio-crepe", "rmvpe", "rmvpe_gpu"],
1147
+ value="rmvpe",
1148
+ interactive=True,
1149
+ )
1150
+
1151
+ with gr.TabItem("TTS"):
1152
+ with gr.Group():
1153
+ with gr.Column():
1154
+ text_test = gr.Textbox(label="Text:", placeholder="Enter the text you want to convert to voice...", value=def_text, lines=6)
1155
+
1156
+ with gr.Group():
1157
+
1158
+ with gr.Column():
1159
+ model_voice_path07 = gr.Dropdown(label='RVC Model:', choices=sorted(names), value=default_weight)
1160
+ best_match_index_path1 = match_index(model_voice_path07.value)
1161
+
1162
+ file_index2_07 = gr.Dropdown(
1163
+ label='Select the .index file (**เลือกให้ตรงกับ Model ที่เลือกไว้**):',
1164
+ choices=get_indexes(),
1165
+ value=def_index,
1166
+ interactive=True,
1167
+ allow_custom_value=True,
1168
+ )
1169
+
1170
+ with gr.Row():
1171
+ with gr.Column():
1172
+ tts_methods_voice = ["Edge-tts"]
1173
+ ttsmethod_test = gr.Dropdown(tts_methods_voice, value='Edge-tts', label = 'TTS Method:', visible=False)
1174
+ tts_test = gr.Dropdown(set_edge_voice, label = 'TTS Model:', value='th-TH-NiwatNeural-Male', visible=True)
1175
+ ttsmethod_test.change(
1176
+ fn=update_tts_methods_voice,
1177
+ inputs=ttsmethod_test,
1178
+ outputs=tts_test,
1179
+ )
1180
+
1181
+ with gr.Row():
1182
+ refresh_button_ = gr.Button("Refresh", variant="primary")
1183
+ refresh_button_.click(fn=change_choices2, inputs=[], outputs=[model_voice_path07, file_index2_07])
1184
+ with gr.Row():
1185
+ original_ttsvoice = gr.Audio(label='Audio TTS:')
1186
+ ttsvoice = gr.Audio(label='Audio RVC:')
1187
+
1188
+ with gr.Row():
1189
+ button_test = gr.Button("Convert", variant="primary")
1190
+
1191
+ button_test.click(make_test, inputs=[
1192
+ text_test,
1193
+ tts_test,
1194
+ model_voice_path07,
1195
+ file_index2_07,
1196
+ vc_transform0,
1197
+ f0method8,
1198
+ index_rate1,
1199
+ crepe_hop_length,
1200
+ f0_autotune,
1201
+ ttsmethod_test
1202
+ ], outputs=[ttsvoice, original_ttsvoice])
1203
+
1204
+ with gr.TabItem("Resources"):
1205
+ gr.Markdown(f"Limit Download Size is {os.getenv('MAX_DOWNLOAD_SIZE')} MB, duplicate the space for modify the limit")
1206
+ easy_infer.download_model()
1207
+ easy_infer.download_audio()
1208
+ # https://huggingface.co/oItsMineZ/oItsMineZ-RVC-Model/resolve/main/DaengGuitar/DaengGuitar.zip
1209
+
1210
+ with gr.TabItem("Settings"):
1211
+ with gr.Row():
1212
+ gr.Markdown(value="Pitch settings")
1213
+ noteshertz = gr.Checkbox(
1214
+ label = "Whether to use note names instead of their hertz value. E.G. [C5, D6] instead of [523.25, 1174.66]Hz",
1215
+ value = rvc_globals.NotesOrHertz,
1216
+ interactive = True,
1217
+ )
1218
+
1219
+ noteshertz.change(fn=lambda nhertz: rvc_globals.__setattr__('NotesOrHertz', nhertz), inputs=[noteshertz], outputs=[])
1220
+
1221
+ noteshertz.change(
1222
+ fn=switch_pitch_controls,
1223
+ inputs=[f0method0],
1224
+ outputs=[
1225
+ minpitch_slider, minpitch_txtbox,
1226
+ maxpitch_slider, maxpitch_txtbox,]
1227
+ )
1228
+ return app
1229
+
1230
+ def GradioRun(app):
1231
+ share_gradio_link = config.iscolab or config.paperspace
1232
+ concurrency_count = 511
1233
+ max_size = 1022
1234
+
1235
+ if (
1236
+ config.iscolab or config.paperspace
1237
+ ):
1238
+ app.queue(concurrency_count=concurrency_count, max_size=max_size).launch()
1239
+ else:
1240
+ app.queue(concurrency_count=concurrency_count, max_size=max_size).launch()
1241
+
1242
+ if __name__ == "__main__":
1243
+ app = GradioSetup(UTheme=config.grtheme)
1244
+ GradioRun(app)