Spaces:
Running
on
Zero
Running
on
Zero
import os | |
import gradio as gr | |
import outetts | |
import torch | |
import spaces | |
# Define available speakers | |
AVAILABLE_SPEAKERS = [ | |
"en_male_1", "en_male_2", "en_female_1", "en_female_2", | |
"zh_male_1", "zh_male_2", "zh_female_1", "zh_female_2", | |
"jp_male_1", "jp_male_2", "jp_female_1", "jp_female_2", | |
"kr_male_1", "kr_male_2", "kr_female_1", "kr_female_2" | |
] | |
def get_available_speakers(): | |
return AVAILABLE_SPEAKERS | |
def generate_tts(text, temperature, repetition_penalty, speaker_selection, reference_audio): | |
model_config = outetts.HFModelConfig_v2( | |
model_path="OuteAI/OuteTTS-0.3-1B", | |
tokenizer_path="OuteAI/OuteTTS-0.3-1B", | |
dtype=torch.bfloat16, | |
device="cuda" | |
) | |
interface = outetts.InterfaceHF(model_version="0.3", cfg=model_config) | |
try: | |
if reference_audio: | |
speaker = interface.create_speaker(reference_audio) | |
elif speaker_selection and speaker_selection != "None": | |
speaker = interface.load_default_speaker(speaker_selection) | |
else: | |
speaker = None | |
gen_cfg = outetts.GenerationConfig( | |
text=text, | |
temperature=temperature, | |
repetition_penalty=repetition_penalty, | |
max_length=4096, | |
speaker=speaker, | |
) | |
output = interface.generate(config=gen_cfg) | |
if output.audio is None: | |
raise ValueError("Audio generation failed. Please try again.") | |
output_path = "output.wav" | |
output.save(output_path) | |
return output_path, None | |
except Exception as e: | |
return None, str(e) | |
# Custom CSS for 3D effect and modern UI | |
custom_css = """ | |
.container { | |
background: linear-gradient(145deg, #f0f0f0, #ffffff); | |
border-radius: 20px; | |
box-shadow: 20px 20px 60px #bebebe, -20px -20px 60px #ffffff; | |
padding: 2rem; | |
} | |
.title { | |
font-size: 2.5rem; | |
text-align: center; | |
background: linear-gradient(45deg, #2196F3, #00BCD4); | |
-webkit-background-clip: text; | |
-webkit-text-fill-color: transparent; | |
margin-bottom: 2rem; | |
} | |
.radio-group { | |
display: grid; | |
grid-template-columns: repeat(auto-fill, minmax(150px, 1fr)); | |
gap: 1rem; | |
margin: 1rem 0; | |
} | |
.control-panel { | |
background: rgba(255, 255, 255, 0.9); | |
border-radius: 15px; | |
padding: 1.5rem; | |
margin: 1rem 0; | |
box-shadow: 0 8px 16px rgba(0,0,0,0.1); | |
} | |
.generate-button { | |
background: linear-gradient(45deg, #2196F3, #00BCD4); | |
color: white; | |
border: none; | |
padding: 1rem 2rem; | |
border-radius: 8px; | |
cursor: pointer; | |
transition: transform 0.2s; | |
} | |
.generate-button:hover { | |
transform: translateY(-2px); | |
} | |
""" | |
with gr.Blocks(css=custom_css) as demo: | |
with gr.Column(elem_classes="container"): | |
gr.Markdown("# Voice Clone Multilingual TTS", elem_classes="title") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
# Main input section with 3D effect | |
with gr.Group(elem_classes="control-panel"): | |
text_input = gr.Textbox( | |
label="Enter Text", | |
placeholder="Type your text here...", | |
lines=3 | |
) | |
speaker_radio = gr.Radio( | |
choices=get_available_speakers(), | |
value="en_male_1", | |
label="Choose Voice", | |
elem_classes="radio-group" | |
) | |
with gr.Row(): | |
temperature = gr.Slider( | |
minimum=0.1, | |
maximum=1.0, | |
value=0.1, | |
label="Expression Level" | |
) | |
repetition_penalty = gr.Slider( | |
minimum=0.5, | |
maximum=2.0, | |
value=1.1, | |
label="Clarity" | |
) | |
reference_audio = gr.Audio( | |
label="Upload Voice Reference", | |
type="filepath" | |
) | |
submit_button = gr.Button( | |
"Generate Speech", | |
variant="primary", | |
elem_classes="generate-button" | |
) | |
with gr.Column(scale=1): | |
# Output section with 3D effect | |
with gr.Group(elem_classes="control-panel"): | |
audio_output = gr.Audio( | |
label="Generated Audio", | |
type="filepath" | |
) | |
error_box = gr.Textbox( | |
label="Status", | |
visible=False | |
) | |
submit_button.click( | |
fn=generate_tts, | |
inputs=[ | |
text_input, | |
temperature, | |
repetition_penalty, | |
speaker_radio, | |
reference_audio, | |
], | |
outputs=[audio_output, error_box] | |
).then( | |
fn=lambda x: gr.update(visible=bool(x)), | |
inputs=[error_box], | |
outputs=[error_box] | |
) | |
demo.launch() |