# Install necessary libraries (if not already installed)
#!pip install gradio transformers soundfile torch

import torch
import soundfile as sf
import gradio as gr
from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor, SpeechT5HifiGan

# Load your fine-tuned model, processor, and vocoder
model = SpeechT5ForTextToSpeech.from_pretrained("krishna195/speecht5_krishna_finatuned")
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")

# Use pre-defined speaker embeddings (you can replace this with your actual embeddings)
speaker_embeddings = torch.randn(1, 512)  # Example embedding size, adjust to your speaker embeddings

# Function to generate speech from text
def text_to_speech(input_text):
    # Process the input text
    inputs = processor(text=input_text, return_tensors="pt")

    # Generate speech using the model and vocoder
    speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)

    # Save the generated speech to a temporary file
    output_file = "generated_speech.wav"
    sf.write(output_file, speech.numpy(), 16000)

    # Return the path to the audio file for Gradio to play
    return output_file

# Create the Gradio UI interface
iface = gr.Interface(
    fn=text_to_speech,
    inputs="text",
    outputs="audio",
    title="Text to Speech Converter",
    description="Enter text and convert it into speech using a fine-tuned SpeechT5 model.",
    examples=[
        ["Hello, how are you doing today?"],
        ["Speech synthesis is amazing with deep learning models."],
        ["TensorFlow and PyTorch are powerful machine learning frameworks."]
    ]
)

# Launch the Gradio app
iface.launch()