Spaces:
Paused
Paused
import torch | |
from transformers import pipeline | |
import numpy as np | |
import gradio as gr | |
def _grab_best_device(use_gpu=True): | |
if torch.cuda.device_count() > 0 and use_gpu: | |
device = "cuda" | |
else: | |
device = "cpu" | |
return device | |
device = _grab_best_device() | |
HUB_PATH = "ylacombe/vits_vctk_welsh_male" | |
pipe = pipeline("text-to-speech", model=HUB_PATH, device=0) | |
title = "# 🐶 VITS" | |
description = """ | |
""" | |
num_speakers = pipe.model.config.num_speakers | |
# Inference | |
def generate_audio(text, spkr_id): | |
forward_params = {"spkr_id": spkr_id} | |
output = pipe(text, forward_params=forward_params) | |
return (output["sampling_rate"], output["audio"].squeeze()) | |
# Gradio blocks demo | |
with gr.Blocks() as demo_blocks: | |
gr.Markdown(title) | |
gr.Markdown(description) | |
with gr.Row(): | |
with gr.Column(): | |
inp_text = gr.Textbox(label="Input Text", info="What would you like bark to synthesise?") | |
spkr = gr.Dropdown( | |
[i for i in range(num_speakers)], | |
value=None, | |
label="Speaker ID", | |
info="Default: Unconditional Generation" | |
) | |
btn = gr.Button("Generate Audio!") | |
with gr.Column(): | |
out_audio_vocos = gr.Audio(type="numpy", autoplay=False, label="Generated Audio", show_label=True) | |
btn.click(generate_audio, [inp_text, spkr], [out_audio_vocos]) | |
demo_blocks.launch() |