Spaces:
Runtime error
Runtime error
File size: 2,592 Bytes
e67043b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
from __future__ import annotations
from typing import TYPE_CHECKING
from gradio_client.client import Job
from gradio_tools.tools.gradio_tool import GradioTool
if TYPE_CHECKING:
import gradio as gr
SUPPORTED_LANGS = [
("English", "en"),
("German", "de"),
("Spanish", "es"),
("French", "fr"),
("Hindi", "hi"),
("Italian", "it"),
("Japanese", "ja"),
("Korean", "ko"),
("Polish", "pl"),
("Portuguese", "pt"),
("Russian", "ru"),
("Turkish", "tr"),
("Chinese", "zh"),
]
SUPPORTED_LANGS = {lang: code for lang, code in SUPPORTED_LANGS}
VOICES = ["Unconditional", "Announcer"]
SUPPORTED_SPEAKERS = VOICES + [p for p in SUPPORTED_LANGS]
NON_SPEECH_TOKENS = [
"[laughter]",
"[laughs]",
"[sighs]",
"[music]",
"[gasps]",
"[clears throat]",
"'♪' for song lyrics. Put ♪ on either side of the the text",
"'…' for hesitations",
]
class BarkTextToSpeechTool(GradioTool):
"""Tool for calling bark text-to-speech llm."""
def __init__(
self,
name="BarkTextToSpeech",
description=(
"A tool for text-to-speech. Use this tool to convert text "
"into sounds that sound like a human read it. Input will be a two strings separated by a |: "
"the first will be the text to read. The second will be the desired speaking language. "
f"It MUST be one of the following choices {','.join(SUPPORTED_SPEAKERS)}. "
f"Additionally, you can include the following non speech tokens: {NON_SPEECH_TOKENS}"
"The output will the text transcript of that file."
),
src="suno/bark",
hf_token=None,
duplicate=False,
) -> None:
super().__init__(name, description, src, hf_token, duplicate)
def create_job(self, query: str) -> Job:
try:
text, speaker = (
query[: query.rindex("|")],
query[(query.rindex("|") + 1) :].strip(),
)
except ValueError:
text, speaker = query, "Unconditional"
if speaker in VOICES:
pass
elif speaker in SUPPORTED_LANGS:
speaker = f"Speaker 0 ({SUPPORTED_LANGS[speaker]})"
else:
speaker = "Unconditional"
return self.client.submit(text, speaker, fn_index=3)
def postprocess(self, output: str) -> str:
return output
def _block_input(self, gr) -> "gr.components.Component":
return gr.Textbox()
def _block_output(self, gr) -> "gr.components.Component":
return gr.Audio()
|