import gradio as gr import numpy as np from bark import SAMPLE_RATE, generate_audio def predict(text_prompt): if len(text_prompt.strip()) == 0: return (16000, np.zeros(0).astype(np.int16)) audio_array = audio_array = generate_audio(text_prompt) audio_array = (audio_array * 32767).astype(np.int16) return (SAMPLE_RATE, audio_array) title = "🐶 Bark" description = """ Bark is a transformer-based text-to-audio model created by [Suno](https://suno.ai/). Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. The model can also produce nonverbal communications like laughing, sighing and crying. """ article = """ ## 🌎 Foreign Language Bark supports various languages out-of-the-box and automatically determines language from input text. When prompted with code-switched text, Bark will even attempt to employ the native accent for the respective languages in the same voice. Try the prompt: ``` Buenos días Miguel. Tu colega piensa que tu alemán es extremadamente malo. But I suppose your english isn't terrible. ``` ## 🤭 Non-Speech Sounds Below is a list of some known non-speech sounds, but we are finding more every day. Please let us know if you find patterns that work particularly well on Discord! * [laughter] * [laughs] * [sighs] * [music] * [gasps] * [clears throat] * — or ... for hesitations * ♪ for song lyrics * capitalization for emphasis of a word * MAN/WOMAN: for bias towards speaker Try the prompt: ``` " [clears throat] Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as... ♪ singing ♪." ``` ## 🎶 Music Bark can generate all types of audio, and, in principle, doesn't see a difference between speech and music. Sometimes Bark chooses to generate text as music, but you can help it out by adding music notes around your lyrics. Try the prompt: ``` ♪ In the jungle, the mighty jungle, the lion barks tonight ♪ ``` ## 👥 Speaker Prompts You can provide certain speaker prompts such as NARRATOR, MAN, WOMAN, etc. Please note that these are not always respected, especially if a conflicting audio history prompt is given. Try the prompt: ``` WOMAN: I would like an oatmilk latte please. MAN: Wow, that's expensive! ``` ## Details Bark model by [Suno](https://suno.ai/), including official [code](https://github.com/suno-ai/bark/tree/main) and model weights. Gradio demo by 🤗 Hugging Face. Bark is licensed under a non-commercial license: CC-BY 4.0 NC. """ examples = [ ["Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe."], ["Buenos días Miguel. Tu colega piensa que tu alemán es extremadamente malo. But I suppose your english isn't terrible."], ["♪ In the jungle, the mighty jungle, the lion barks tonight ♪"], ["WOMAN: I would like an oatmilk latte please. MAN: Wow, that's expensive!"], ] gr.Interface( fn=predict, inputs=[ gr.Text(label="Input Text"), ], outputs=[ gr.Audio(label="Generated Speech", type="numpy"), ], title=title, description=description, article=article, examples=examples, ).launch(share=True)