import os import gradio as gr import whisper from gtts import gTTS import io from openai import OpenAI # Import OpenAI for AI/ML API calls # Set the base URL and API key for AI/ML API base_url = "https://api.aimlapi.com/v1" api_key = "701b35863e6d4a7b81bdcad2e6f3c880" # Your API key # Initialize the OpenAI API with the custom base URL and your API key api = OpenAI(api_key=api_key, base_url=base_url) # Load the Whisper model for audio transcription model = whisper.load_model("base") # Function to make a chat completion call to the AI/ML API def call_aiml_api(user_prompt, system_prompt="You are a helpful assistant."): try: completion = api.chat.completions.create( model="mistralai/Mistral-7B-Instruct-v0.2", # Specify the model from AI/ML messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ], temperature=0.7, max_tokens=256, ) # Return the response from the AI model return completion.choices[0].message.content.strip() except Exception as e: raise Exception(f"API request failed with error: {e}") # Function to process audio and interact with the AI/ML API def process_audio(file_path): try: # Load and transcribe audio using Whisper audio = whisper.load_audio(file_path) result = model.transcribe(audio) user_prompt = result["text"] # Call AI/ML API to get a response response_message = call_aiml_api(user_prompt) # Convert response message to speech using gTTS tts = gTTS(response_message) response_audio_io = io.BytesIO() tts.write_to_fp(response_audio_io) # Save the audio to BytesIO object response_audio_io.seek(0) # Save the audio file with open("response.mp3", "wb") as audio_file: audio_file.write(response_audio_io.getvalue()) # Return the response text and audio file path return response_message, "response.mp3" except Exception as e: # Handle any errors return f"An error occurred: {e}", None # Interface configurations (UI) title = "Voice-to-Voice AI Chatbot with AI/ML API" description = "Developed by [Adnan Tariq](https://www.linkedin.com/in/adnaantariq/) with ❤️" article = "### Instructions\n1. Upload an audio file.\n2. Wait for the transcription.\n3. Listen to the chatbot's response." # Gradio interface setup iface = gr.Interface( fn=process_audio, inputs=gr.Audio(type="filepath"), # Upload audio via file path outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")], live=True, title=title, description=description, theme="dark", article=article ) # Launch the Gradio app iface.launch()