# import whisper | |
# import os | |
# from gtts import gTTS | |
# import gradio as gr | |
# from groq import Groq | |
# from datetime import datetime | |
# import tempfile | |
# # Load a smaller Whisper model for faster processing | |
# try: | |
# model = whisper.load_model("tiny") | |
# except Exception as e: | |
# print(f"Error loading Whisper model: {e}") | |
# model = None | |
# # Set up Groq API client using environment variable | |
# GROQ_API_TOKEN = os.getenv("GROQ_API") | |
# if not GROQ_API_TOKEN: | |
# raise ValueError("Groq API token is missing. Set 'GROQ_API' in your environment variables.") | |
# client = Groq(api_key=GROQ_API_TOKEN) | |
# # Initialize the chat history | |
# chat_history = [] | |
# # Function to get the LLM response from Groq with timeout handling | |
# def get_llm_response(user_input, role="detailed responder"): | |
# prompt = f"As an expert, provide a detailed and knowledgeable response: {user_input}" if role == "expert" else \ | |
# f"As a good assistant, provide a clear, concise, and helpful response: {user_input}" if role == "good assistant" else \ | |
# f"Provide a thorough and detailed response: {user_input}" | |
# try: | |
# chat_completion = client.chat.completions.create( | |
# messages=[{"role": "user", "content": user_input}], | |
# model="llama3-8b-8192", # Replace with your desired model | |
# timeout=20 # Increased timeout to 20 seconds | |
# ) | |
# return chat_completion.choices[0].message.content | |
# except Exception as e: | |
# print(f"Error during LLM response retrieval: {e}") | |
# return "Sorry, there was an error retrieving the response. Please try again." | |
# # Function to convert text to speech using gTTS and handle temporary files | |
# def text_to_speech(text): | |
# try: | |
# tts = gTTS(text) | |
# with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file: | |
# output_audio = temp_file.name | |
# tts.save(output_audio) | |
# return output_audio | |
# except Exception as e: | |
# print(f"Error generating TTS: {e}") | |
# return None | |
# # Main chatbot function to handle audio input and output with chat history | |
# def chatbot(audio): | |
# if not model: | |
# return "Error: Whisper model is not available.", None, chat_history | |
# if not audio: | |
# return "No audio provided. Please upload a valid audio file.", None, chat_history | |
# try: | |
# # Step 1: Transcribe the audio using Whisper | |
# result = model.transcribe(audio) | |
# user_text = result.get("text", "") | |
# if not user_text.strip(): | |
# return "Could not understand the audio. Please try speaking more clearly.", None, chat_history | |
# # Get current timestamp | |
# timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
# # Display transcription in chat history | |
# chat_history.append((timestamp, "User", user_text)) | |
# # Step 2: Get LLM response from Groq | |
# response_text = get_llm_response(user_text) | |
# # Step 3: Convert the response text to speech | |
# output_audio = text_to_speech(response_text) | |
# # Append the latest interaction to the chat history | |
# chat_history.append((timestamp, "Chatbot", response_text)) | |
# # Format the chat history for display with timestamps and clear labels | |
# formatted_history = "\n".join([f"[{time}] {speaker}: {text}" for time, speaker, text in chat_history]) | |
# return formatted_history, output_audio, chat_history | |
# except Exception as e: | |
# print(f"Error in chatbot function: {e}") | |
# return "Sorry, there was an error processing your request.", None, chat_history | |
# # Gradio interface for real-time interaction with chat history display | |
# iface = gr.Interface( | |
# fn=chatbot, | |
# inputs=gr.Audio(type="filepath"), | |
# outputs=[ | |
# gr.Textbox(label="Chat History", lines=10, interactive=False), # Display chat history | |
# gr.Audio(type="filepath", label="Response Audio"), | |
# ], | |
# live=True, | |
# title="Voice to Voice Chatbot", | |
# description="Upload your audio, and the chatbot will transcribe and respond to it with a synthesized response.", | |
# theme="default", | |
# css=''' | |
# body { | |
# background-image: url("https://huggingface.co/spaces/abdullahzunorain/voice-to-voice-Chatbot/resolve/main/BG_1.jpg"); | |
# background-size: cover; | |
# background-position: center; | |
# background-repeat: no-repeat; | |
# color: white; | |
# font-family: 'Helvetica Neue', sans-serif; | |
# } | |
# .gradio-container { | |
# background-color: rgba(0, 0, 0, 0.7); | |
# padding: 20px; | |
# border-radius: 10px; | |
# box-shadow: 0 4px 20px rgba(0, 0, 0, 0.5); | |
# } | |
# h1, h2, p, .gradio-label { | |
# color: #FFD700; /* Gold color for labels and titles */ | |
# text-align: center; | |
# } | |
# .gradio-button { | |
# background-color: #FFD700; | |
# color: black; | |
# border-radius: 5px; | |
# font-weight: bold; | |
# transition: background-color 0.3s, transform 0.2s; | |
# } | |
# .gradio-button:hover { | |
# background-color: #FFC107; /* Lighter gold on hover */ | |
# transform: scale(1.05); | |
# } | |
# .gradio-input { | |
# background-color: rgba(255, 255, 255, 0.9); | |
# border-radius: 4px; | |
# border: 2px solid #FFD700; /* Gold border */ | |
# } | |
# .gradio-audio { | |
# border: 2px solid #FFD700; /* Gold border for audio */ | |
# } | |
# ''' | |
# ) | |
# # Launch the Gradio app | |
# if __name__ == "__main__": | |
# iface.launch() | |
import whisper | |
import os | |
from gtts import gTTS | |
import gradio as gr | |
from groq import Groq | |
from datetime import datetime | |
import tempfile | |
# Load a smaller Whisper model for faster processing | |
try: | |
model = whisper.load_model("tiny") | |
except Exception as e: | |
print(f"Error loading Whisper model: {e}") | |
model = None | |
# Set up Groq API client using environment variable | |
GROQ_API_TOKEN = os.getenv("GROQ_API") | |
if not GROQ_API_TOKEN: | |
raise ValueError("Groq API token is missing. Set 'GROQ_API' in your environment variables.") | |
client = Groq(api_key=GROQ_API_TOKEN) | |
# Initialize the chat history | |
chat_history = [] | |
# Function to get the LLM response from Groq with timeout handling | |
def get_llm_response(user_input, role="detailed responder"): | |
prompt = f"As an expert, provide a detailed and knowledgeable response: {user_input}" if role == "expert" else \ | |
f"As a good assistant, provide a clear, concise, and helpful response: {user_input}" if role == "good assistant" else \ | |
f"Provide a thorough and detailed response: {user_input}" | |
try: | |
chat_completion = client.chat.completions.create( | |
messages=[{"role": "user", "content": user_input}], | |
model="llama3-8b-8192", # Replace with your desired model | |
timeout=20 # Increased timeout to 20 seconds | |
) | |
return chat_completion.choices[0].message.content | |
except Exception as e: | |
print(f"Error during LLM response retrieval: {e}") | |
return "Sorry, there was an error retrieving the response. Please try again." | |
# Function to convert text to speech using gTTS and handle temporary files | |
def text_to_speech(text): | |
try: | |
tts = gTTS(text) | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file: | |
output_audio = temp_file.name | |
tts.save(output_audio) | |
return output_audio | |
except Exception as e: | |
print(f"Error generating TTS: {e}") | |
return None | |
# Main chatbot function to handle audio input and output with chat history | |
def chatbot(audio): | |
if not model: | |
return "Error: Whisper model is not available.", None, chat_history | |
if not audio: | |
return "No audio provided. Please upload a valid audio file.", None, chat_history | |
try: | |
# Step 1: Transcribe the audio using Whisper | |
result = model.transcribe(audio) | |
user_text = result.get("text", "") | |
if not user_text.strip(): | |
return "Could not understand the audio. Please try speaking more clearly.", None, chat_history | |
# Get current timestamp | |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
# Display transcription in chat history | |
chat_history.append((timestamp, "User", user_text)) | |
# Step 2: Get LLM response from Groq | |
response_text = get_llm_response(user_text) | |
# Step 3: Convert the response text to speech | |
output_audio = text_to_speech(response_text) | |
# Append the latest interaction to the chat history | |
chat_history.append((timestamp, "Chatbot", response_text)) | |
# Format the chat history for display with timestamps and clear labels | |
formatted_history = "\n".join([f"[{time}] {speaker}: {text}" for time, speaker, text in chat_history]) | |
return formatted_history, output_audio, chat_history | |
except Exception as e: | |
print(f"Error in chatbot function: {e}") | |
return "Sorry, there was an error processing your request.", None, chat_history | |
# Gradio interface for real-time interaction with chat history display | |
iface = gr.Interface( | |
fn=chatbot, | |
inputs=gr.Audio(type="filepath"), | |
outputs=[ | |
gr.Textbox(label="Chat History"), # Display chat history | |
gr.Audio(type="filepath", label="Response Audio"), | |
], | |
live=True, | |
title="Voice to Voice Chatbot", | |
description="Upload your audio, and the chatbot will transcribe and respond to it with a synthesized response.", | |
theme="default", | |
css=''' | |
body { | |
background-image: url("https://huggingface.co/spaces/abdullahzunorain/voice-to-voice-Chatbot/resolve/main/BG_1.jpg"); | |
background-size: cover; | |
background-position: center; | |
background-repeat: no-repeat; | |
color: white; | |
font-family: Arial, sans-serif; | |
} | |
# .gradio-container { | |
# background-color: rgba(0, 0, 0, 0.6); | |
# padding: 20px; | |
# border-radius: 8px; | |
# box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2); | |
# } | |
# h1, h2, p, .gradio-label { | |
# color: #FFD700; /* Gold color for labels and titles */ | |
# } | |
# .gradio-button { | |
# background-color: #FFD700; | |
# color: black; | |
# border-radius: 4px; | |
# font-weight: bold; | |
# } | |
# .gradio-input { | |
# background-color: rgba(255, 255, 255, 0.9); | |
# border-radius: 4px; | |
# } | |
''' | |
) | |
# Launch the Gradio app | |
if __name__ == "__main__": | |
iface.launch() | |