Spaces:
Sleeping
Sleeping
File size: 1,999 Bytes
597103f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import io
import os
import gradio as gr
import librosa
import numpy as np
import soundfile
import torch
from inference.infer_tool import Svc
import logging
logging.getLogger("numba").setLevel(logging.WARNING)
model_path = "logs/48k/aqua.pth"
config_path = "configs/config.json"
svc_model = Svc(model_path, config_path)
def vc_fn(input_audio, vc_transform, term):
if not term:
return "请阅读并同意《AI阿夸模型使用协议》", None
if input_audio is None:
return "请上传音频", None
sampling_rate, audio = input_audio
duration = audio.shape[0] / sampling_rate
if duration > 30:
return "请上传小于30s的音频,长音频的转换请在本地进行", None
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
if len(audio.shape) > 1:
audio = librosa.to_mono(audio.transpose(1, 0))
if sampling_rate != 24000:
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=24000)
print(audio.shape)
out_wav_path = io.BytesIO()
soundfile.write(out_wav_path, audio, 24000, format="wav")
out_wav_path.seek(0)
# sid = sid_map[sid]
sid = "aqua"
out_audio, out_sr = svc_model.infer(sid, vc_transform, out_wav_path)
_audio = out_audio.cpu().numpy()
return "Success", (48000, _audio)
inputs = [
gr.inputs.Audio(source="upload"),
gr.inputs.Number(default=0),
gr.Checkbox(label="您已阅读并同意《AI阿夸模型使用协议》")
]
outputs = [
"text",
gr.outputs.Audio(type="numpy")
]
example = [
["./raw/大手拉小手.wav", 0, False]
]
des = """
## 在使用此模型前请阅读[AI阿夸模型使用协议](https://huggingface.co/spaces/DoNotSelect/AI-minato_aqua/blob/main/terms.md)
"""
demo = gr.Interface(
fn=vc_fn,
inputs=inputs,
outputs=outputs,
layout="horizontal",
theme="huggingface",
description=des,
examples=example,
cache_examples=True
)
if __name__ == "__main__":
demo.launch()
|