import gradio as gr import os from coqui_tts import run_audio_generation_v1 from metaVoice import run_audio_generation_v2 import shutil os.environ["COQUI_TOS_AGREED"] = "1" def process_audio(input_text, speaker_audio, speaker_name, option_selected): try: # Ensure necessary directories exist os.makedirs("./tmp/audio/input_src/", exist_ok=True) os.makedirs("audio", exist_ok=True) print(f"Received audio file: {speaker_audio}") if not speaker_audio or not os.path.exists(speaker_audio): return "Error: The uploaded audio file is missing or invalid. Please upload again." # Copy speaker audio to the required location speaker_audio_path = "./tmp/audio/input_src/0.wav" if os.path.exists(speaker_audio): shutil.copy(speaker_audio, speaker_audio_path) else: raise FileNotFoundError(f"Speaker audio file not found: {speaker_audio}") # Check selected option and execute corresponding function if option_selected == "Xtts_v2": # Generate TTS audio using run_audio_generation_v1 run_audio_generation_v1(input_text) elif option_selected =="metaVoice": # return f"The option is {option_selected }not implemented yet." run_audio_generation_v2(input_text) else: return f"The option is not implemented yet." # Save the output audio under the speaker's name speaker_output_path = f"audio/{speaker_name}.wav" os.rename("audio/output.wav", speaker_output_path) return speaker_output_path except Exception as e: return str(e) # Gradio interface with gr.Blocks() as demo: gr.Markdown("# Audio Cloning and Text-to-Speech") with gr.Row(): text_input = gr.Textbox(label="Input Text", placeholder="Enter your text here.") with gr.Row(): speaker_audio = gr.Audio(label="Speaker Audio (to be cloned)", type='filepath', format='wav') speaker_name = gr.Textbox(label="Speaker Name", placeholder="Enter the speaker's name.") option_selected = gr.Dropdown(choices=["Xtts_v2", "metaVoice", "more"], label="Select an Option") submit_btn = gr.Button("Submit") output_audio = gr.Audio(label="Generated Audio Output", type='filepath') submit_btn.click( fn=process_audio, inputs=[text_input, speaker_audio, speaker_name, option_selected], outputs=output_audio, ) # Launch the Gradio app demo.launch()