abdullahzunorain's picture
Update app.py
1aff08b verified
# import whisper
# import os
# from gtts import gTTS
# import gradio as gr
# from groq import Groq
# from datetime import datetime
# import tempfile
# # Load a smaller Whisper model for faster processing
# try:
# model = whisper.load_model("tiny")
# except Exception as e:
# print(f"Error loading Whisper model: {e}")
# model = None
# # Set up Groq API client using environment variable
# GROQ_API_TOKEN = os.getenv("GROQ_API")
# if not GROQ_API_TOKEN:
# raise ValueError("Groq API token is missing. Set 'GROQ_API' in your environment variables.")
# client = Groq(api_key=GROQ_API_TOKEN)
# # Initialize the chat history
# chat_history = []
# # Function to get the LLM response from Groq with timeout handling
# def get_llm_response(user_input, role="detailed responder"):
# prompt = f"As an expert, provide a detailed and knowledgeable response: {user_input}" if role == "expert" else \
# f"As a good assistant, provide a clear, concise, and helpful response: {user_input}" if role == "good assistant" else \
# f"Provide a thorough and detailed response: {user_input}"
# try:
# chat_completion = client.chat.completions.create(
# messages=[{"role": "user", "content": user_input}],
# model="llama3-8b-8192", # Replace with your desired model
# timeout=20 # Increased timeout to 20 seconds
# )
# return chat_completion.choices[0].message.content
# except Exception as e:
# print(f"Error during LLM response retrieval: {e}")
# return "Sorry, there was an error retrieving the response. Please try again."
# # Function to convert text to speech using gTTS and handle temporary files
# def text_to_speech(text):
# try:
# tts = gTTS(text)
# with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
# output_audio = temp_file.name
# tts.save(output_audio)
# return output_audio
# except Exception as e:
# print(f"Error generating TTS: {e}")
# return None
# # Main chatbot function to handle audio input and output with chat history
# def chatbot(audio):
# if not model:
# return "Error: Whisper model is not available.", None, chat_history
# if not audio:
# return "No audio provided. Please upload a valid audio file.", None, chat_history
# try:
# # Step 1: Transcribe the audio using Whisper
# result = model.transcribe(audio)
# user_text = result.get("text", "")
# if not user_text.strip():
# return "Could not understand the audio. Please try speaking more clearly.", None, chat_history
# # Get current timestamp
# timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# # Display transcription in chat history
# chat_history.append((timestamp, "User", user_text))
# # Step 2: Get LLM response from Groq
# response_text = get_llm_response(user_text)
# # Step 3: Convert the response text to speech
# output_audio = text_to_speech(response_text)
# # Append the latest interaction to the chat history
# chat_history.append((timestamp, "Chatbot", response_text))
# # Format the chat history for display with timestamps and clear labels
# formatted_history = "\n".join([f"[{time}] {speaker}: {text}" for time, speaker, text in chat_history])
# return formatted_history, output_audio, chat_history
# except Exception as e:
# print(f"Error in chatbot function: {e}")
# return "Sorry, there was an error processing your request.", None, chat_history
# # Gradio interface for real-time interaction with chat history display
# iface = gr.Interface(
# fn=chatbot,
# inputs=gr.Audio(type="filepath"),
# outputs=[
# gr.Textbox(label="Chat History", lines=10, interactive=False), # Display chat history
# gr.Audio(type="filepath", label="Response Audio"),
# ],
# live=True,
# title="Voice to Voice Chatbot",
# description="Upload your audio, and the chatbot will transcribe and respond to it with a synthesized response.",
# theme="default",
# css='''
# body {
# background-image: url("https://huggingface.co/spaces/abdullahzunorain/voice-to-voice-Chatbot/resolve/main/BG_1.jpg");
# background-size: cover;
# background-position: center;
# background-repeat: no-repeat;
# color: white;
# font-family: 'Helvetica Neue', sans-serif;
# }
# .gradio-container {
# background-color: rgba(0, 0, 0, 0.7);
# padding: 20px;
# border-radius: 10px;
# box-shadow: 0 4px 20px rgba(0, 0, 0, 0.5);
# }
# h1, h2, p, .gradio-label {
# color: #FFD700; /* Gold color for labels and titles */
# text-align: center;
# }
# .gradio-button {
# background-color: #FFD700;
# color: black;
# border-radius: 5px;
# font-weight: bold;
# transition: background-color 0.3s, transform 0.2s;
# }
# .gradio-button:hover {
# background-color: #FFC107; /* Lighter gold on hover */
# transform: scale(1.05);
# }
# .gradio-input {
# background-color: rgba(255, 255, 255, 0.9);
# border-radius: 4px;
# border: 2px solid #FFD700; /* Gold border */
# }
# .gradio-audio {
# border: 2px solid #FFD700; /* Gold border for audio */
# }
# '''
# )
# # Launch the Gradio app
# if __name__ == "__main__":
# iface.launch()
import whisper
import os
from gtts import gTTS
import gradio as gr
from groq import Groq
from datetime import datetime
import tempfile
# Load a smaller Whisper model for faster processing
try:
model = whisper.load_model("tiny")
except Exception as e:
print(f"Error loading Whisper model: {e}")
model = None
# Set up Groq API client using environment variable
GROQ_API_TOKEN = os.getenv("GROQ_API")
if not GROQ_API_TOKEN:
raise ValueError("Groq API token is missing. Set 'GROQ_API' in your environment variables.")
client = Groq(api_key=GROQ_API_TOKEN)
# Initialize the chat history
chat_history = []
# Function to get the LLM response from Groq with timeout handling
def get_llm_response(user_input, role="detailed responder"):
prompt = f"As an expert, provide a detailed and knowledgeable response: {user_input}" if role == "expert" else \
f"As a good assistant, provide a clear, concise, and helpful response: {user_input}" if role == "good assistant" else \
f"Provide a thorough and detailed response: {user_input}"
try:
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": user_input}],
model="llama3-8b-8192", # Replace with your desired model
timeout=20 # Increased timeout to 20 seconds
)
return chat_completion.choices[0].message.content
except Exception as e:
print(f"Error during LLM response retrieval: {e}")
return "Sorry, there was an error retrieving the response. Please try again."
# Function to convert text to speech using gTTS and handle temporary files
def text_to_speech(text):
try:
tts = gTTS(text)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
output_audio = temp_file.name
tts.save(output_audio)
return output_audio
except Exception as e:
print(f"Error generating TTS: {e}")
return None
# Main chatbot function to handle audio input and output with chat history
def chatbot(audio):
if not model:
return "Error: Whisper model is not available.", None, chat_history
if not audio:
return "No audio provided. Please upload a valid audio file.", None, chat_history
try:
# Step 1: Transcribe the audio using Whisper
result = model.transcribe(audio)
user_text = result.get("text", "")
if not user_text.strip():
return "Could not understand the audio. Please try speaking more clearly.", None, chat_history
# Get current timestamp
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Display transcription in chat history
chat_history.append((timestamp, "User", user_text))
# Step 2: Get LLM response from Groq
response_text = get_llm_response(user_text)
# Step 3: Convert the response text to speech
output_audio = text_to_speech(response_text)
# Append the latest interaction to the chat history
chat_history.append((timestamp, "Chatbot", response_text))
# Format the chat history for display with timestamps and clear labels
formatted_history = "\n".join([f"[{time}] {speaker}: {text}" for time, speaker, text in chat_history])
return formatted_history, output_audio, chat_history
except Exception as e:
print(f"Error in chatbot function: {e}")
return "Sorry, there was an error processing your request.", None, chat_history
# Gradio interface for real-time interaction with chat history display
iface = gr.Interface(
fn=chatbot,
inputs=gr.Audio(type="filepath"),
outputs=[
gr.Textbox(label="Chat History"), # Display chat history
gr.Audio(type="filepath", label="Response Audio"),
],
live=True,
title="Voice to Voice Chatbot",
description="Upload your audio, and the chatbot will transcribe and respond to it with a synthesized response.",
theme="default",
css='''
body {
background-image: url("https://huggingface.co/spaces/abdullahzunorain/voice-to-voice-Chatbot/resolve/main/BG_1.jpg");
background-size: cover;
background-position: center;
background-repeat: no-repeat;
color: white;
font-family: Arial, sans-serif;
}
# .gradio-container {
# background-color: rgba(0, 0, 0, 0.6);
# padding: 20px;
# border-radius: 8px;
# box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
# }
# h1, h2, p, .gradio-label {
# color: #FFD700; /* Gold color for labels and titles */
# }
# .gradio-button {
# background-color: #FFD700;
# color: black;
# border-radius: 4px;
# font-weight: bold;
# }
# .gradio-input {
# background-color: rgba(255, 255, 255, 0.9);
# border-radius: 4px;
# }
'''
)
# Launch the Gradio app
if __name__ == "__main__":
iface.launch()