import os import gradio as gr import outetts from outetts.version.v2.interface import *DEFAULT*SPEAKERS import torch import spaces def get_available_speakers(): speakers = list(_DEFAULT_SPEAKERS.keys()) return speakers @spaces.GPU def generate_tts(text, temperature, repetition_penalty, speaker_selection, reference_audio): model_config = outetts.HFModelConfig_v2( model_path="OuteAI/OuteTTS-0.3-1B", tokenizer_path="OuteAI/OuteTTS-0.3-1B", dtype=torch.bfloat16, device="cuda" ) interface = outetts.InterfaceHF(model_version="0.3", cfg=model_config) try: if reference_audio: speaker = interface.create_speaker(reference_audio) elif speaker_selection and speaker_selection != "None": speaker = interface.load_default_speaker(speaker_selection) else: speaker = None gen_cfg = outetts.GenerationConfig( text=text, temperature=temperature, repetition_penalty=repetition_penalty, max_length=4096, speaker=speaker, ) output = interface.generate(config=gen_cfg) if output.audio is None: raise ValueError("Audio generation failed. Please try again.") output_path = "output.wav" output.save(output_path) return output_path, None except Exception as e: return None, str(e) # Custom CSS for 3D effect and modern UI custom_css = """ .container { background: linear-gradient(145deg, #f0f0f0, #ffffff); border-radius: 20px; box-shadow: 20px 20px 60px #bebebe, -20px -20px 60px #ffffff; padding: 2rem; } .title { font-size: 2.5rem; text-align: center; background: linear-gradient(45deg, #2196F3, #00BCD4); -webkit-background-clip: text; -webkit-text-fill-color: transparent; margin-bottom: 2rem; } .radio-group { display: grid; grid-template-columns: repeat(auto-fill, minmax(150px, 1fr)); gap: 1rem; margin: 1rem 0; } .control-panel { background: rgba(255, 255, 255, 0.9); border-radius: 15px; padding: 1.5rem; margin: 1rem 0; } """ with gr.Blocks(css=custom_css) as demo: with gr.Column(elem_classes="container"): gr.Markdown("# Voice Clone Multilingual TTS", elem_classes="title") with gr.Row(): with gr.Column(scale=2): # Main input section with 3D effect with gr.Group(elem_classes="control-panel"): text_input = gr.Textbox( label="Enter Text", placeholder="Type your text here...", lines=3 ) speaker_radio = gr.Radio( choices=get_available_speakers(), value="en_male_1", label="Choose Voice", elem_classes="radio-group" ) with gr.Row(): temperature = gr.Slider( minimum=0.1, maximum=1.0, value=0.1, label="Expression Level" ) repetition_penalty = gr.Slider( minimum=0.5, maximum=2.0, value=1.1, label="Clarity" ) reference_audio = gr.Audio( label="Upload Voice Reference", type="filepath" ) submit_button = gr.Button( "Generate Speech", variant="primary" ) with gr.Column(scale=1): # Output section audio_output = gr.Audio( label="Generated Audio", type="filepath" ) error_box = gr.Textbox( label="Status", visible=False ) submit_button.click( fn=generate_tts, inputs=[ text_input, temperature, repetition_penalty, speaker_radio, reference_audio, ], outputs=[audio_output, error_box] ).then( fn=lambda x: gr.update(visible=bool(x)), inputs=[error_box], outputs=[error_box] ) demo.launch()