Spaces:
Build error
Build error
import os | |
import time | |
import gradio as gr | |
import soundfile | |
import torch | |
import infer_tool | |
convert_cnt = [0] | |
dev = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# model_name = "83_epochs.pth" | |
model_name = "mg_1324_epochs_v0.0.8.pth" | |
config_name = "milky_green.json" | |
net_g_ms, hubert_soft, feature_input, hps_ms = infer_tool.load_model(f"{model_name}", f"configs/{config_name}") | |
# 获取config参数 | |
target_sample = hps_ms.data.sampling_rate | |
spk_dict = { | |
"明前奶绿": 0, | |
"云灏": 2, | |
"即霜": 3, | |
"奕兰秋": 4 | |
} | |
def list_models(): | |
global model_name | |
res = [] | |
dir = os.getcwd() | |
for f in os.listdir(dir): | |
if(f.startswith("D_")): | |
continue | |
if(f.endswith(".pth")): | |
res.append(f) | |
if len(f) >= len(model_name): | |
model_name = f | |
return res | |
def vc_fn(sid, audio_record, audio_upload, tran): | |
print(sid, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) | |
if audio_upload is not None: | |
audio_path = audio_upload | |
elif audio_record is not None: | |
audio_path = audio_record | |
else: | |
return "你需要上传wav文件或使用网页内置的录音!", None | |
audio, sampling_rate = infer_tool.format_wav(audio_path, target_sample) | |
duration = audio.shape[0] / sampling_rate | |
if duration > 600: | |
return "请上传小于600s的音频,需要转换长音频请使用colab", None | |
o_audio, out_sr = infer_tool.infer(audio_path, spk_dict[sid], tran, net_g_ms, hubert_soft, feature_input) | |
out_path = f"./out_temp.wav" | |
soundfile.write(out_path, o_audio, target_sample) | |
infer_tool.f0_plt(audio_path, out_path, tran, hubert_soft, feature_input) | |
mistake, var = infer_tool.calc_error(audio_path, out_path, tran, feature_input) | |
return f"半音偏差:{mistake}\n半音方差:{var}", ( | |
target_sample, o_audio), gr.Image.update("temp.jpg") | |
def change_model(model): | |
global model_name | |
global net_g_ms | |
global hubert_soft | |
global feature_input | |
global hps_ms | |
model_name = model | |
net_g_ms, hubert_soft, feature_input, hps_ms = infer_tool.load_model(f"{model_name}", f"configs/{config_name}") | |
return "载入模型:"+model_name | |
available_models = list_models() | |
app = gr.Blocks() | |
with app: | |
with gr.Tabs(): | |
with gr.TabItem("Basic"): | |
gr.Markdown(value=""" | |
本模型为 [sovits_f0](https://huggingface.co/spaces/innnky/nyaru-svc2.0-advanced) 魔改。含AI奶绿(aka. [明前奶绿](https://space.bilibili.com/2132180406))音色,支持**60s以内**的**无伴奏**wav、mp3(单声道)格式,或使用**网页内置**的录音(二选一) | |
转换效果取决于源音频语气、节奏是否与目标音色相近,以及音域是否超出目标音色音域范围 | |
奶绿高音数据效果稍差,一些音高过高的需要考虑降调 | |
该模型基于 [innnky/so-vits-svc](https://github.com/innnky/so-vits-svc),如果想自己制作并训练模型可以访问这个 [GitHub 仓库](https://github.com/IceKyrin/sovits_guide) | |
""") | |
model_selected = gr.Dropdown(choices=available_models, label = "模型", value=model_name) | |
current_model_text = gr.Markdown("") | |
model_selected.change(change_model, inputs=[model_selected], outputs=[current_model_text]) | |
speaker_id = gr.Dropdown(label="音色", choices=['明前奶绿'], value="明前奶绿") | |
record_input = gr.Audio(source="microphone", label="录制你的声音", type="filepath", elem_id="audio_inputs") | |
upload_input = gr.Audio(source="upload", label="上传音频(长度小于60秒)", type="filepath", | |
elem_id="audio_inputs") | |
vc_transform = gr.Number(label="升降半音(整数,可以正负,半音数量,升高八度就是12)", value=0) | |
vc_submit = gr.Button("转换", variant="primary") | |
out_audio = gr.Audio(label="Output Audio") | |
gr.Markdown(value=""" | |
输出信息为音高平均偏差半音数量,体现转换音频的跑调情况(一般平均小于0.5个半音) | |
""") | |
out_message = gr.Textbox(label="跑调误差信息") | |
gr.Markdown(value="""f0曲线可以直观的显示跑调情况,蓝色为输入音高,橙色为合成音频的音高 | |
若**只看见橙色**,说明蓝色曲线被覆盖,转换效果较好 | |
""") | |
f0_image = gr.Image(label="f0曲线") | |
vc_submit.click(vc_fn, [speaker_id, record_input, upload_input, vc_transform], | |
[out_message, out_audio, f0_image]) | |
with gr.TabItem("使用说明"): | |
gr.Markdown(value=""" | |
0、合集:https://github.com/IceKyrin/sovits_guide/blob/main/README.md | |
1、仅支持sovit_f0(sovits2.0)模型 | |
2、自行下载hubert-soft-0d54a1f4.pt改名为hubert.pt放置于pth文件夹下(已经下好了) | |
https://github.com/bshall/hubert/releases/tag/v0.1 | |
3、pth文件夹下放置sovits2.0的模型 | |
4、与模型配套的xxx.json,需有speaker项——人物列表 | |
5、放无伴奏的音频、或网页内置录音,不要放奇奇怪怪的格式 | |
6、仅供交流使用,不对用户行为负责 | |
""") | |
app.launch() | |