import os import gradio as gr from scipy.io.wavfile import write import spaces import numpy as np import librosa def resample_audio(audio, target_sample_rate): current_sample_rate, audio_data = audio audio_data = audio_data.astype(np.float32) if current_sample_rate != target_sample_rate: resampled_audio_data = librosa.resample(audio_data, orig_sr=current_sample_rate, target_sr=target_sample_rate) return target_sample_rate, resampled_audio_data return audio @spaces.GPU def inference(audio): os.system("pwd") os.makedirs("out", exist_ok=True) target_sample_rate = 44100 audio = resample_audio(audio, target_sample_rate) write('test.wav', audio[0], audio[1]) os.system("python3 -m demucs.separate -n mdx_extra_q --two-stems=vocals test.wav -o out") return "./out/mdx_extra_q/test/vocals.wav","./out/mdx_extra_q/test/no_vocals.wav" title = "음성 분리" demo = gr.Interface( inference, gr.Audio(type="numpy", label="Input"), [gr.Audio(type="filepath", label="음성"),gr.Audio(type="filepath", label="배경음")], title=title, ) demo.queue(max_size=1) demo.launch(debug=True)