import io import os import gradio as gr import librosa import numpy as np import soundfile import torch from inference.infer_tool import Svc import logging logging.getLogger("numba").setLevel(logging.WARNING) model_path = "logs/48k/aqua.pth" config_path = "configs/config.json" svc_model = Svc(model_path, config_path) def vc_fn(input_audio, vc_transform, term): if not term: return "请阅读并同意《AI阿夸模型使用协议》", None if input_audio is None: return "请上传音频", None sampling_rate, audio = input_audio duration = audio.shape[0] / sampling_rate if duration > 30: return "请上传小于30s的音频,长音频的转换请在本地进行", None audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32) if len(audio.shape) > 1: audio = librosa.to_mono(audio.transpose(1, 0)) if sampling_rate != 24000: audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=24000) print(audio.shape) out_wav_path = io.BytesIO() soundfile.write(out_wav_path, audio, 24000, format="wav") out_wav_path.seek(0) # sid = sid_map[sid] sid = "aqua" out_audio, out_sr = svc_model.infer(sid, vc_transform, out_wav_path) _audio = out_audio.cpu().numpy() return "Success", (48000, _audio) inputs = [ gr.inputs.Audio(source="upload"), gr.inputs.Number(default=0), gr.Checkbox(label="您已阅读并同意《AI阿夸模型使用协议》") ] outputs = [ "text", gr.outputs.Audio(type="numpy") ] example = [ ["./raw/大手拉小手.wav", 0, False] ] des = """ ## 在使用此模型前请阅读[AI阿夸模型使用协议](https://huggingface.co/spaces/DoNotSelect/AI-minato_aqua/blob/main/terms.md) """ demo = gr.Interface( fn=vc_fn, inputs=inputs, outputs=outputs, layout="horizontal", theme="huggingface", description=des, examples=example, cache_examples=True ) if __name__ == "__main__": demo.launch()