# Install necessary libraries import os import numpy as np from speechbrain.pretrained import Tacotron2, HIFIGAN from scipy.io.wavfile import write import streamlit as st # Load TTS and vocoder models @st.cache_resource # Cache the models to avoid reloading def load_models(): tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmp_tts") hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmp_vocoder") return tacotron2, hifi_gan tacotron2, hifi_gan = load_models() # Text-to-Speech function def text_to_speech(text): try: # Generate mel spectrogram mel_output, _, _ = tacotron2.encode_text(text) # Generate waveform from mel spectrogram waveforms = hifi_gan.decode_batch(mel_output) # Convert waveform to numpy format waveform = waveforms.squeeze().cpu().numpy() # Normalize waveform to range [-1, 1] waveform = waveform / np.max(np.abs(waveform)) # Save waveform to a .wav file output_path = "output.wav" write(output_path, 22050, (waveform * 32767).astype(np.int16)) return output_path except Exception as e: st.error(f"Error during text-to-speech generation: {e}") return None # Streamlit UI st.title("Text-to-Speech Application") st.write("Enter text below and convert it to speech!") # Input field text_input = st.text_area("Enter Text:", "Hello, welcome to the Text-to-Speech app!") if st.button("Generate Speech"): if text_input.strip(): output_audio = text_to_speech(text_input) if output_audio: st.audio(output_audio, format="audio/wav") else: st.error("Failed to generate audio. Please check the input text.") else: st.warning("Please enter some text to generate speech.")