Spaces:

abdullahzunorain
/

voice-to-voice-Chatbot

Sleeping

App Files Files Community

voice-to-voice-Chatbot / app.py

abdullahzunorain

Update app.py

1aff08b verified 2 months ago

raw

history blame contribute delete

10.9 kB

	# import whisper
	# import os
	# from gtts import gTTS
	# import gradio as gr
	# from groq import Groq
	# from datetime import datetime
	# import tempfile

	# # Load a smaller Whisper model for faster processing
	# try:
	# model = whisper.load_model("tiny")
	# except Exception as e:
	# print(f"Error loading Whisper model: {e}")
	# model = None

	# # Set up Groq API client using environment variable
	# GROQ_API_TOKEN = os.getenv("GROQ_API")
	# if not GROQ_API_TOKEN:
	# raise ValueError("Groq API token is missing. Set 'GROQ_API' in your environment variables.")
	# client = Groq(api_key=GROQ_API_TOKEN)

	# # Initialize the chat history
	# chat_history = []

	# # Function to get the LLM response from Groq with timeout handling
	# def get_llm_response(user_input, role="detailed responder"):
	# prompt = f"As an expert, provide a detailed and knowledgeable response: {user_input}" if role == "expert" else \
	# f"As a good assistant, provide a clear, concise, and helpful response: {user_input}" if role == "good assistant" else \
	# f"Provide a thorough and detailed response: {user_input}"

	# try:
	# chat_completion = client.chat.completions.create(
	# messages=[{"role": "user", "content": user_input}],
	# model="llama3-8b-8192", # Replace with your desired model
	# timeout=20 # Increased timeout to 20 seconds
	# )
	# return chat_completion.choices[0].message.content
	# except Exception as e:
	# print(f"Error during LLM response retrieval: {e}")
	# return "Sorry, there was an error retrieving the response. Please try again."

	# # Function to convert text to speech using gTTS and handle temporary files
	# def text_to_speech(text):
	# try:
	# tts = gTTS(text)
	# with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
	# output_audio = temp_file.name
	# tts.save(output_audio)
	# return output_audio
	# except Exception as e:
	# print(f"Error generating TTS: {e}")
	# return None

	# # Main chatbot function to handle audio input and output with chat history
	# def chatbot(audio):
	# if not model:
	# return "Error: Whisper model is not available.", None, chat_history

	# if not audio:
	# return "No audio provided. Please upload a valid audio file.", None, chat_history

	# try:
	# # Step 1: Transcribe the audio using Whisper
	# result = model.transcribe(audio)
	# user_text = result.get("text", "")
	# if not user_text.strip():
	# return "Could not understand the audio. Please try speaking more clearly.", None, chat_history

	# # Get current timestamp
	# timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

	# # Display transcription in chat history
	# chat_history.append((timestamp, "User", user_text))

	# # Step 2: Get LLM response from Groq
	# response_text = get_llm_response(user_text)

	# # Step 3: Convert the response text to speech
	# output_audio = text_to_speech(response_text)

	# # Append the latest interaction to the chat history
	# chat_history.append((timestamp, "Chatbot", response_text))

	# # Format the chat history for display with timestamps and clear labels
	# formatted_history = "\n".join([f"[{time}] {speaker}: {text}" for time, speaker, text in chat_history])

	# return formatted_history, output_audio, chat_history

	# except Exception as e:
	# print(f"Error in chatbot function: {e}")
	# return "Sorry, there was an error processing your request.", None, chat_history

	# # Gradio interface for real-time interaction with chat history display
	# iface = gr.Interface(
	# fn=chatbot,
	# inputs=gr.Audio(type="filepath"),
	# outputs=[
	# gr.Textbox(label="Chat History", lines=10, interactive=False), # Display chat history
	# gr.Audio(type="filepath", label="Response Audio"),
	# ],
	# live=True,
	# title="Voice to Voice Chatbot",
	# description="Upload your audio, and the chatbot will transcribe and respond to it with a synthesized response.",
	# theme="default",
	# css='''
	# body {
	# background-image: url("https://huggingface.co/spaces/abdullahzunorain/voice-to-voice-Chatbot/resolve/main/BG_1.jpg");
	# background-size: cover;
	# background-position: center;
	# background-repeat: no-repeat;
	# color: white;
	# font-family: 'Helvetica Neue', sans-serif;
	# }
	# .gradio-container {
	# background-color: rgba(0, 0, 0, 0.7);
	# padding: 20px;
	# border-radius: 10px;
	# box-shadow: 0 4px 20px rgba(0, 0, 0, 0.5);
	# }
	# h1, h2, p, .gradio-label {
	# color: #FFD700; /* Gold color for labels and titles */
	# text-align: center;
	# }
	# .gradio-button {
	# background-color: #FFD700;
	# color: black;
	# border-radius: 5px;
	# font-weight: bold;
	# transition: background-color 0.3s, transform 0.2s;
	# }
	# .gradio-button:hover {
	# background-color: #FFC107; /* Lighter gold on hover */
	# transform: scale(1.05);
	# }
	# .gradio-input {
	# background-color: rgba(255, 255, 255, 0.9);
	# border-radius: 4px;
	# border: 2px solid #FFD700; /* Gold border */
	# }
	# .gradio-audio {
	# border: 2px solid #FFD700; /* Gold border for audio */
	# }
	# '''
	# )

	# # Launch the Gradio app
	# if __name__ == "__main__":
	# iface.launch()










	import whisper
	import os
	from gtts import gTTS
	import gradio as gr
	from groq import Groq
	from datetime import datetime
	import tempfile

	# Load a smaller Whisper model for faster processing
	try:
	model = whisper.load_model("tiny")
	except Exception as e:
	print(f"Error loading Whisper model: {e}")
	model = None

	# Set up Groq API client using environment variable
	GROQ_API_TOKEN = os.getenv("GROQ_API")
	if not GROQ_API_TOKEN:
	raise ValueError("Groq API token is missing. Set 'GROQ_API' in your environment variables.")
	client = Groq(api_key=GROQ_API_TOKEN)

	# Initialize the chat history
	chat_history = []

	# Function to get the LLM response from Groq with timeout handling
	def get_llm_response(user_input, role="detailed responder"):
	prompt = f"As an expert, provide a detailed and knowledgeable response: {user_input}" if role == "expert" else \
	f"As a good assistant, provide a clear, concise, and helpful response: {user_input}" if role == "good assistant" else \
	f"Provide a thorough and detailed response: {user_input}"

	try:
	chat_completion = client.chat.completions.create(
	messages=[{"role": "user", "content": user_input}],
	model="llama3-8b-8192", # Replace with your desired model
	timeout=20 # Increased timeout to 20 seconds
	)
	return chat_completion.choices[0].message.content
	except Exception as e:
	print(f"Error during LLM response retrieval: {e}")
	return "Sorry, there was an error retrieving the response. Please try again."

	# Function to convert text to speech using gTTS and handle temporary files
	def text_to_speech(text):
	try:
	tts = gTTS(text)
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
	output_audio = temp_file.name
	tts.save(output_audio)
	return output_audio
	except Exception as e:
	print(f"Error generating TTS: {e}")
	return None

	# Main chatbot function to handle audio input and output with chat history
	def chatbot(audio):
	if not model:
	return "Error: Whisper model is not available.", None, chat_history

	if not audio:
	return "No audio provided. Please upload a valid audio file.", None, chat_history

	try:
	# Step 1: Transcribe the audio using Whisper
	result = model.transcribe(audio)
	user_text = result.get("text", "")
	if not user_text.strip():
	return "Could not understand the audio. Please try speaking more clearly.", None, chat_history

	# Get current timestamp
	timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

	# Display transcription in chat history
	chat_history.append((timestamp, "User", user_text))

	# Step 2: Get LLM response from Groq
	response_text = get_llm_response(user_text)

	# Step 3: Convert the response text to speech
	output_audio = text_to_speech(response_text)

	# Append the latest interaction to the chat history
	chat_history.append((timestamp, "Chatbot", response_text))

	# Format the chat history for display with timestamps and clear labels
	formatted_history = "\n".join([f"[{time}] {speaker}: {text}" for time, speaker, text in chat_history])

	return formatted_history, output_audio, chat_history

	except Exception as e:
	print(f"Error in chatbot function: {e}")
	return "Sorry, there was an error processing your request.", None, chat_history

	# Gradio interface for real-time interaction with chat history display
	iface = gr.Interface(
	fn=chatbot,
	inputs=gr.Audio(type="filepath"),
	outputs=[
	gr.Textbox(label="Chat History"), # Display chat history
	gr.Audio(type="filepath", label="Response Audio"),
	],
	live=True,
	title="Voice to Voice Chatbot",
	description="Upload your audio, and the chatbot will transcribe and respond to it with a synthesized response.",
	theme="default",
	css='''
	body {
	background-image: url("https://huggingface.co/spaces/abdullahzunorain/voice-to-voice-Chatbot/resolve/main/BG_1.jpg");
	background-size: cover;
	background-position: center;
	background-repeat: no-repeat;
	color: white;
	font-family: Arial, sans-serif;
	}
	# .gradio-container {
	# background-color: rgba(0, 0, 0, 0.6);
	# padding: 20px;
	# border-radius: 8px;
	# box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
	# }
	# h1, h2, p, .gradio-label {
	# color: #FFD700; /* Gold color for labels and titles */
	# }
	# .gradio-button {
	# background-color: #FFD700;
	# color: black;
	# border-radius: 4px;
	# font-weight: bold;
	# }
	# .gradio-input {
	# background-color: rgba(255, 255, 255, 0.9);
	# border-radius: 4px;
	# }
	'''
	)

	# Launch the Gradio app
	if __name__ == "__main__":
	iface.launch()