# Install necessary libraries (if not already installed) #!pip install gradio transformers soundfile torch import torch import soundfile as sf import gradio as gr from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor, SpeechT5HifiGan # Load your fine-tuned model, processor, and vocoder model = SpeechT5ForTextToSpeech.from_pretrained("krishna195/speecht5_krishna_finatuned") vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts") # Use pre-defined speaker embeddings (you can replace this with your actual embeddings) speaker_embeddings = torch.randn(1, 512) # Example embedding size, adjust to your speaker embeddings # Function to generate speech from text def text_to_speech(input_text): # Process the input text inputs = processor(text=input_text, return_tensors="pt") # Generate speech using the model and vocoder speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder) # Save the generated speech to a temporary file output_file = "generated_speech.wav" sf.write(output_file, speech.numpy(), 16000) # Return the path to the audio file for Gradio to play return output_file # Create the Gradio UI interface iface = gr.Interface( fn=text_to_speech, inputs="text", outputs="audio", title="Text to Speech Converter", description="Enter text and convert it into speech using a fine-tuned SpeechT5 model.", examples=[ ["Hello, how are you doing today?"], ["Speech synthesis is amazing with deep learning models."], ["TensorFlow and PyTorch are powerful machine learning frameworks."] ] ) # Launch the Gradio app iface.launch()