# import whisper # import os # from gtts import gTTS # import gradio as gr # from groq import Groq # from datetime import datetime # import tempfile # # Load a smaller Whisper model for faster processing # try: # model = whisper.load_model("tiny") # except Exception as e: # print(f"Error loading Whisper model: {e}") # model = None # # Set up Groq API client using environment variable # GROQ_API_TOKEN = os.getenv("GROQ_API") # if not GROQ_API_TOKEN: # raise ValueError("Groq API token is missing. Set 'GROQ_API' in your environment variables.") # client = Groq(api_key=GROQ_API_TOKEN) # # Initialize the chat history # chat_history = [] # # Function to get the LLM response from Groq with timeout handling # def get_llm_response(user_input, role="detailed responder"): # prompt = f"As an expert, provide a detailed and knowledgeable response: {user_input}" if role == "expert" else \ # f"As a good assistant, provide a clear, concise, and helpful response: {user_input}" if role == "good assistant" else \ # f"Provide a thorough and detailed response: {user_input}" # try: # chat_completion = client.chat.completions.create( # messages=[{"role": "user", "content": user_input}], # model="llama3-8b-8192", # Replace with your desired model # timeout=20 # Increased timeout to 20 seconds # ) # return chat_completion.choices[0].message.content # except Exception as e: # print(f"Error during LLM response retrieval: {e}") # return "Sorry, there was an error retrieving the response. Please try again." # # Function to convert text to speech using gTTS and handle temporary files # def text_to_speech(text): # try: # tts = gTTS(text) # with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file: # output_audio = temp_file.name # tts.save(output_audio) # return output_audio # except Exception as e: # print(f"Error generating TTS: {e}") # return None # # Main chatbot function to handle audio input and output with chat history # def chatbot(audio): # if not model: # return "Error: Whisper model is not available.", None, chat_history # if not audio: # return "No audio provided. Please upload a valid audio file.", None, chat_history # try: # # Step 1: Transcribe the audio using Whisper # result = model.transcribe(audio) # user_text = result.get("text", "") # if not user_text.strip(): # return "Could not understand the audio. Please try speaking more clearly.", None, chat_history # # Get current timestamp # timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") # # Display transcription in chat history # chat_history.append((timestamp, "User", user_text)) # # Step 2: Get LLM response from Groq # response_text = get_llm_response(user_text) # # Step 3: Convert the response text to speech # output_audio = text_to_speech(response_text) # # Append the latest interaction to the chat history # chat_history.append((timestamp, "Chatbot", response_text)) # # Format the chat history for display with timestamps and clear labels # formatted_history = "\n".join([f"[{time}] {speaker}: {text}" for time, speaker, text in chat_history]) # return formatted_history, output_audio, chat_history # except Exception as e: # print(f"Error in chatbot function: {e}") # return "Sorry, there was an error processing your request.", None, chat_history # # Gradio interface for real-time interaction with chat history display # iface = gr.Interface( # fn=chatbot, # inputs=gr.Audio(type="filepath"), # outputs=[ # gr.Textbox(label="Chat History", lines=10, interactive=False), # Display chat history # gr.Audio(type="filepath", label="Response Audio"), # ], # live=True, # title="Voice to Voice Chatbot", # description="Upload your audio, and the chatbot will transcribe and respond to it with a synthesized response.", # theme="default", # css=''' # body { # background-image: url("https://huggingface.co/spaces/abdullahzunorain/voice-to-voice-Chatbot/resolve/main/BG_1.jpg"); # background-size: cover; # background-position: center; # background-repeat: no-repeat; # color: white; # font-family: 'Helvetica Neue', sans-serif; # } # .gradio-container { # background-color: rgba(0, 0, 0, 0.7); # padding: 20px; # border-radius: 10px; # box-shadow: 0 4px 20px rgba(0, 0, 0, 0.5); # } # h1, h2, p, .gradio-label { # color: #FFD700; /* Gold color for labels and titles */ # text-align: center; # } # .gradio-button { # background-color: #FFD700; # color: black; # border-radius: 5px; # font-weight: bold; # transition: background-color 0.3s, transform 0.2s; # } # .gradio-button:hover { # background-color: #FFC107; /* Lighter gold on hover */ # transform: scale(1.05); # } # .gradio-input { # background-color: rgba(255, 255, 255, 0.9); # border-radius: 4px; # border: 2px solid #FFD700; /* Gold border */ # } # .gradio-audio { # border: 2px solid #FFD700; /* Gold border for audio */ # } # ''' # ) # # Launch the Gradio app # if __name__ == "__main__": # iface.launch() import whisper import os from gtts import gTTS import gradio as gr from groq import Groq from datetime import datetime import tempfile # Load a smaller Whisper model for faster processing try: model = whisper.load_model("tiny") except Exception as e: print(f"Error loading Whisper model: {e}") model = None # Set up Groq API client using environment variable GROQ_API_TOKEN = os.getenv("GROQ_API") if not GROQ_API_TOKEN: raise ValueError("Groq API token is missing. Set 'GROQ_API' in your environment variables.") client = Groq(api_key=GROQ_API_TOKEN) # Initialize the chat history chat_history = [] # Function to get the LLM response from Groq with timeout handling def get_llm_response(user_input, role="detailed responder"): prompt = f"As an expert, provide a detailed and knowledgeable response: {user_input}" if role == "expert" else \ f"As a good assistant, provide a clear, concise, and helpful response: {user_input}" if role == "good assistant" else \ f"Provide a thorough and detailed response: {user_input}" try: chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": user_input}], model="llama3-8b-8192", # Replace with your desired model timeout=20 # Increased timeout to 20 seconds ) return chat_completion.choices[0].message.content except Exception as e: print(f"Error during LLM response retrieval: {e}") return "Sorry, there was an error retrieving the response. Please try again." # Function to convert text to speech using gTTS and handle temporary files def text_to_speech(text): try: tts = gTTS(text) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file: output_audio = temp_file.name tts.save(output_audio) return output_audio except Exception as e: print(f"Error generating TTS: {e}") return None # Main chatbot function to handle audio input and output with chat history def chatbot(audio): if not model: return "Error: Whisper model is not available.", None, chat_history if not audio: return "No audio provided. Please upload a valid audio file.", None, chat_history try: # Step 1: Transcribe the audio using Whisper result = model.transcribe(audio) user_text = result.get("text", "") if not user_text.strip(): return "Could not understand the audio. Please try speaking more clearly.", None, chat_history # Get current timestamp timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") # Display transcription in chat history chat_history.append((timestamp, "User", user_text)) # Step 2: Get LLM response from Groq response_text = get_llm_response(user_text) # Step 3: Convert the response text to speech output_audio = text_to_speech(response_text) # Append the latest interaction to the chat history chat_history.append((timestamp, "Chatbot", response_text)) # Format the chat history for display with timestamps and clear labels formatted_history = "\n".join([f"[{time}] {speaker}: {text}" for time, speaker, text in chat_history]) return formatted_history, output_audio, chat_history except Exception as e: print(f"Error in chatbot function: {e}") return "Sorry, there was an error processing your request.", None, chat_history # Gradio interface for real-time interaction with chat history display iface = gr.Interface( fn=chatbot, inputs=gr.Audio(type="filepath"), outputs=[ gr.Textbox(label="Chat History"), # Display chat history gr.Audio(type="filepath", label="Response Audio"), ], live=True, title="Voice to Voice Chatbot", description="Upload your audio, and the chatbot will transcribe and respond to it with a synthesized response.", theme="default", css=''' body { background-image: url("https://huggingface.co/spaces/abdullahzunorain/voice-to-voice-Chatbot/resolve/main/BG_1.jpg"); background-size: cover; background-position: center; background-repeat: no-repeat; color: white; font-family: Arial, sans-serif; } # .gradio-container { # background-color: rgba(0, 0, 0, 0.6); # padding: 20px; # border-radius: 8px; # box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2); # } # h1, h2, p, .gradio-label { # color: #FFD700; /* Gold color for labels and titles */ # } # .gradio-button { # background-color: #FFD700; # color: black; # border-radius: 4px; # font-weight: bold; # } # .gradio-input { # background-color: rgba(255, 255, 255, 0.9); # border-radius: 4px; # } ''' ) # Launch the Gradio app if __name__ == "__main__": iface.launch()