import os import gradio as gr import outetts from outetts.version.v2.interface import _DEFAULT_SPEAKERS import torch import spaces def get_available_speakers(): speakers = list(_DEFAULT_SPEAKERS.keys()) return speakers @spaces.GPU def generate_tts(text, temperature, repetition_penalty, speaker_selection, reference_audio): model_config = outetts.HFModelConfig_v2( model_path="OuteAI/OuteTTS-0.3-1B", tokenizer_path="OuteAI/OuteTTS-0.3-1B", dtype=torch.bfloat16, device="cuda" ) interface = outetts.InterfaceHF(model_version="0.3", cfg=model_config) try: # Validate inputs for custom speaker if reference_audio: speaker = interface.create_speaker(reference_audio) # Use selected default speaker elif speaker_selection and speaker_selection != "None": speaker = interface.load_default_speaker(speaker_selection) # No speaker - random characteristics else: speaker = None gen_cfg = outetts.GenerationConfig( text=text, temperature=temperature, repetition_penalty=repetition_penalty, max_length=4096, speaker=speaker, ) output = interface.generate(config=gen_cfg) # Verify output if output.audio is None: raise ValueError("Model failed to generate audio. This may be due to input length constraints or early EOS token.") # Save and return output output_path = "output.wav" output.save(output_path) return output_path, None except Exception as e: return None, str(e) # Custom CSS for 3D styling custom_css = """ .container { background: linear-gradient(145deg, #f3f4f6, #ffffff); border-radius: 20px; box-shadow: 10px 10px 20px #d1d1d1, -10px -10px 20px #ffffff; padding: 2rem; margin: 1rem; transition: all 0.3s ease; } .title { font-size: 2.5rem; font-weight: bold; color: #1a1a1a; text-align: center; margin-bottom: 2rem; text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.1); } .input-group { background: #ffffff; border-radius: 15px; padding: 1.5rem; margin: 1rem 0; box-shadow: inset 5px 5px 10px #e0e0e0, inset -5px -5px 10px #ffffff; } .button-3d { background: linear-gradient(145deg, #3b82f6, #2563eb); color: white; border: none; padding: 0.8rem 1.5rem; border-radius: 10px; font-weight: bold; cursor: pointer; transition: all 0.3s ease; box-shadow: 5px 5px 10px #d1d1d1, -5px -5px 10px #ffffff; } .button-3d:hover { transform: translateY(-2px); box-shadow: 7px 7px 15px #d1d1d1, -7px -7px 15px #ffffff; } .slider-3d { height: 12px; border-radius: 6px; background: linear-gradient(145deg, #e6e7eb, #ffffff); box-shadow: inset 3px 3px 6px #d1d1d1, inset -3px -3px 6px #ffffff; } .error-box { background: #fee2e2; border-left: 4px solid #ef4444; padding: 1rem; border-radius: 8px; margin: 1rem 0; } """ # Create the Gradio interface with 3D styling with gr.Blocks(css=custom_css) as demo: gr.Markdown('