test / app.py
adnaan05's picture
Update app.py
f8294c1 verified
import os
import gradio as gr
import whisper
from gtts import gTTS
import io
from openai import OpenAI # Import OpenAI for AI/ML API calls
# Set the base URL and API key for AI/ML API
base_url = "https://api.aimlapi.com/v1"
api_key = "701b35863e6d4a7b81bdcad2e6f3c880" # Your API key
# Initialize the OpenAI API with the custom base URL and your API key
api = OpenAI(api_key=api_key, base_url=base_url)
# Load the Whisper model for audio transcription
model = whisper.load_model("base")
# Function to make a chat completion call to the AI/ML API
def call_aiml_api(user_prompt, system_prompt="You are a helpful assistant."):
try:
completion = api.chat.completions.create(
model="mistralai/Mistral-7B-Instruct-v0.2", # Specify the model from AI/ML
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
temperature=0.7,
max_tokens=256,
)
# Return the response from the AI model
return completion.choices[0].message.content.strip()
except Exception as e:
raise Exception(f"API request failed with error: {e}")
# Function to process audio and interact with the AI/ML API
def process_audio(file_path):
try:
# Load and transcribe audio using Whisper
audio = whisper.load_audio(file_path)
result = model.transcribe(audio)
user_prompt = result["text"]
# Call AI/ML API to get a response
response_message = call_aiml_api(user_prompt)
# Convert response message to speech using gTTS
tts = gTTS(response_message)
response_audio_io = io.BytesIO()
tts.write_to_fp(response_audio_io) # Save the audio to BytesIO object
response_audio_io.seek(0)
# Save the audio file
with open("response.mp3", "wb") as audio_file:
audio_file.write(response_audio_io.getvalue())
# Return the response text and audio file path
return response_message, "response.mp3"
except Exception as e:
# Handle any errors
return f"An error occurred: {e}", None
# Interface configurations (UI)
title = "Voice-to-Voice AI Chatbot with AI/ML API"
description = "Developed by [Adnan Tariq](https://www.linkedin.com/in/adnaantariq/) with ❤️"
article = "### Instructions\n1. Upload an audio file.\n2. Wait for the transcription.\n3. Listen to the chatbot's response."
# Gradio interface setup
iface = gr.Interface(
fn=process_audio,
inputs=gr.Audio(type="filepath"), # Upload audio via file path
outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
live=True,
title=title,
description=description,
theme="dark",
article=article
)
# Launch the Gradio app
iface.launch()