heyzzk241211v1

Paused

App Files Files Community

on1onmangoes commited on Dec 3, 2024

Commit

acad46a

verified ·

1 Parent(s): 24f4f37

Create app.py

Browse files

Files changed (1) hide show

app.py +768 -0

app.py ADDED Viewed

	@@ -0,0 +1,768 @@

+import gradio as gr
+from gradio_webrtc import WebRTC, ReplyOnPause, AdditionalOutputs
+import anthropic
+from pyht import Client as PyHtClient, TTSOptions
+import dataclasses
+import os
+import numpy as np
+from huggingface_hub import InferenceClient
+import io
+from pydub import AudioSegment
+from dotenv import load_dotenv
+import sambanova_gradio
+from elevenlabs import ElevenLabs, VoiceSettings
+## added by AL on 111124 to get SambaNova
+import os
+import openai
+## added by AL on 111424 to get Pinecone
+from pinecone import Pinecone
+## To get the semantic piece
+from openai import OpenAI
+# ADDED BY AL ON 111824 TO GET PYPDF FOR ANNIE
+import requests
+#import io
+import json
+#import os
+import PyPDF2
+# added by al on 120224 to clean the response.
+import numpy as np  # Ensure numpy is imported
+import re  # Fix for the missing re module
+# added by al 120224 to improve speed of responses.
+import random
+import numpy as np  # Ensure numpy is imported
+import torch
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+#ADDED BY AL ON 112624 TO GET GRADIO CLIENT FOR STREAMING CHATBOT
+from gradio_client import Client as gcClient
+from huggingface_hub import login
+# client = Client("BACKENDAPI2024/radarbackend11262024v11")
+# result = client.predict(
+# 		messages=[],
+# 		user_message="Hello!!",
+# 		api_name="/api_get_response_on_enter"
+# )
+# print(result)
+import whisper as whisper
+load_dotenv()
+account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
+auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
+# GET API KEYS Added by AL on 111124
+SAMBA_NOVA_API_KEY = os.environ.get("SAMBA_NOVA_API_KEY", None)
+ELEVEN_LABS_API_KEY = os.environ.get("ELEVEN_LABS_API_KEY", None)
+ELEVEN_DEFAULT_VOICE_ID = "ogvfya0XETMq7tFy4TO2"  # Replace with your desired voice ID
+CLAUDE_CLIENT_API_KEY = os.environ.get("ANTHROPIC_KEY", None)
+PLAYHT_SECRET_KEY = os.environ.get("PLAYHT_SECRET_KEY", None)
+PLAYHT_USER_ID = os.environ.get("PLAYHT_USER_ID", None)
+PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY", None)
+# GET API KEYS Added by AL on 111524
+OPENAI_API_KEY=os.environ.get("OPENAI_API_KEY", None)
+# wip added by al to make the open ai embedding work
+openai_client2 = OpenAI(
+              api_key=OPENAI_API_KEY,
+                )
+# # set pinecone index name
+index_name="radardataclean11122024"
+# index = pc.Index(host="INDEX_HOST")
+# added by Al on 111424
+pc = Pinecone(api_key=PINECONE_API_KEY)
+index = pc.Index(index_name)
+# Added by al on 112624 to get the hf token for the gradio client
+HF_TOKEN = os.getenv("HF_TOKEN")
+if HF_TOKEN is None:
+    print("Please set your Hugging Face token in the environment variables.")
+else:
+    login(token=HF_TOKEN)
+print(CLAUDE_CLIENT_API_KEY)
+print(PLAYHT_SECRET_KEY)
+print(PLAYHT_USER_ID)
+print(ELEVEN_LABS_API_KEY)
+print(PINECONE_API_KEY)
+print(OPENAI_API_KEY)
+print(HF_TOKEN)
+# Whisper Speech-to-Text
+model_id = 'openai/whisper-large-v3'
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device)
+processor = AutoProcessor.from_pretrained(model_id)
+pipe_asr = pipeline(
+    "automatic-speech-recognition",
+    model=model,
+    tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor,
+    max_new_tokens=128,
+    chunk_length_s=15,
+    batch_size=16,
+    torch_dtype=torch_dtype,
+    device=device,
+    return_timestamps=True
+)
+def auto_reset_state():
+    time.sleep(5)
+    return None, ""
+def transcribe_function(stream, new_chunk):
+    try:
+        sr, y = new_chunk[0], new_chunk[1]
+    except TypeError:
+        print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
+        return stream, "", None
+    if y is None or len(y) == 0:
+        return stream, "", None
+    y = y.astype(np.float32)
+    max_abs_y = np.max(np.abs(y))
+    if max_abs_y > 0:
+        y = y / max_abs_y
+    if stream is not None and len(stream) > 0:
+        stream = np.concatenate([stream, y])
+    else:
+        stream = y
+    result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
+    full_text = result.get("text", "")
+    threading.Thread(target=auto_reset_state).start()
+    return stream, full_text, full_text
+def clear_transcription_state():
+    return None, ""
+if account_sid and auth_token:
+    from twilio.rest import Client
+    client = Client(account_sid, auth_token)
+    token = client.tokens.create()
+    rtc_configuration = {
+        "iceServers": token.ice_servers,
+        "iceTransportPolicy": "relay",
+    }
+else:
+    rtc_configuration = None
+@dataclasses.dataclass
+class Clients:
+    claude: anthropic.Anthropic
+    play_ht: PyHtClient
+    hf: InferenceClient
+    eleven: ElevenLabs
+    sambanova: openai
+    pc: Pinecone
+    openai: openai
+    gc: gcClient
+    grState: gr.State
+    #whisper: whisper
+## added by al for annie voice on 111124
+# tts_options = TTSOptions(voice= "s3://voice-cloning-zero-shot/544e26e9-64b4-4243-aed7-8f8891212ada/original/manifest.json",
+#                            sample_rate=24000)
+tts_options = TTSOptions(voice= "s3://voice-cloning-zero-shot/544e26e9-64b4-4243-aed7-8f8891212ada/original/manifest.json",
+                           sample_rate=48000)
+# From Play HT APIS https://docs.play.ht/reference/api-list-cloned-voices
+def aggregate_chunks(chunks_iterator):
+    leftover = b''  # Store incomplete bytes between chunks
+    for chunk in chunks_iterator:
+        # Combine with any leftover bytes from previous chunk
+        current_bytes = leftover + chunk
+        # Calculate complete samples
+        n_complete_samples = len(current_bytes) // 2  # int16 = 2 bytes
+        bytes_to_process = n_complete_samples * 2
+        # Split into complete samples and leftover
+        to_process = current_bytes[:bytes_to_process]
+        leftover = current_bytes[bytes_to_process:]
+        if to_process:  # Only yield if we have complete samples
+            audio_array = np.frombuffer(to_process, dtype=np.int16).reshape(1, -1)
+            yield audio_array
+def audio_to_bytes(audio: tuple[int, np.ndarray]) -> bytes:
+    audio_buffer = io.BytesIO()
+    segment = AudioSegment(
+        audio[1].tobytes(),
+        frame_rate=audio[0],
+        sample_width=audio[1].dtype.itemsize,
+        channels=1,
+    )
+    segment.export(audio_buffer, format="mp3")
+    return audio_buffer.getvalue()
+# AL MIGHT NOT BE USING THIS APPROACH ANYMORE.
+def semantic_search(query):
+    # Generate embeddings for the query using OpenAI
+    #response = client_state.openai.Embedding.create(
+    #response = client_state.openai.embeddings.create(
+    response = openai_client2.embeddings.create(
+        input=query,
+        model="text-embedding-ada-002"  # Using text-embedding-ada-002 as ChatGPT-4 does not provide embeddings directly
+    )
+    query_embedding = response['data'][0]['embedding']
+    # Search the Pinecone index
+    result = index.query(queries=[query_embedding], top_k=100, include_metadata=True)
+    # Format the results as a string
+    results_str = ""
+    for match in result.matches:
+        metadata = match.metadata
+        score = match.score
+        results_str += f"ID: {match.id}, Score: {score}, Metadata: {metadata}\n"
+    return results_str
+## Updated version without the key selections
+def set_api_key():
+    try:
+        claude_client = anthropic.Anthropic(api_key=CLAUDE_CLIENT_API_KEY)
+        play_ht_client = PyHtClient(user_id=PLAYHT_USER_ID, api_key=PLAYHT_SECRET_KEY)
+        # added by al on 111124
+        eleven_client = ElevenLabs(api_key="ELEVEN_LABS_API_KEY",)
+        sambanova_client = openai.OpenAI(
+                    #api_key=os.environ.get("SAMBANOVA_API_KEY"),
+                    api_key=SAMBA_NOVA_API_KEY,
+                    base_url="https://api.sambanova.ai/v1",
+                )
+        # added by al on 111424 for Pinecone
+        pc_client = Pinecone(api_key=os.environ['PINECONE_API_KEY'])
+        # added by al on 111524 for openai embeddings
+        openai_client = OpenAI(
+              api_key=OPENAI_API_KEY,
+                )
+        # added by al on 112524 for gradio client for RAG
+        #gradio_c = Client("BACKENDAPI2024/radarbackend11262024v11")
+        # Initialize the client for the backend
+        #gradio_c = Client("BACKENDAPI2024/radarbackend11262024v11", hf_token=HF_TOKEN)
+        #gradio_c = Client("on1onmangoes/radarheyanniebackend112724", hf_token=HF_TOKEN)
+        gradio_c= gcClient ("on1onmangoes/radarheyanniebackend112724", hf_token=HF_TOKEN)
+        gradio_state = gr.State(),
+        #whisper_model = whisper.load_model("turbo")
+    except:
+        raise gr.Error("Invalid API keys. Please try again.")
+    gr.Info("Successfully set Radar.", duration=3)
+    return Clients(claude=claude_client, play_ht=play_ht_client,
+                   hf=InferenceClient(), eleven=eleven_client,sambanova=sambanova_client, pc=pc_client, openai=openai_client,gc=gradio_c,grState=gradio_state), gr.skip()
+#------------------------------------- END OF ANTHROPIC STYLE)
+#--- Added by AL on 111124 for Samba Nova Optimizations, commented out on 111724 to use the new prompt with context as well below
+# system_message = """You are Annie, a friendly and intelligent voice assistant who only replies in a couple of sentences.  Today is November 18 2024 use this date for reference
+# Your primary goal is to provide accurate, concise, and engaging responses while maintaining a positive and upbeat tone about New Country Music, Alabama and WZZK 104.7.
+# Always aim to provide clear and relevant information that directly addresses the user's query, but feel free to sprinkle in a dash of humor—after all,
+# laughter is the best app! Keep your responses brief and to the point, avoiding unnecessary details or tangents, unless they’re hilariously relevant.
+# Use a friendly and approachable tone to create a pleasant interaction, and don’t shy away from a cheeky pun or two!
+# Tailor your responses based on the user's input and previous interactions, ensuring a personalized experience that feels like chatting with a witty friend.
+# Keep the responses short and sweet, and yet informational, insightful, no longer than 3 sentences. Provide addresses and dates
+# Invite users to ask follow-up questions or clarify their needs, fostering a conversational flow that’s as smooth as butter on a hot pancake. Avoid the same greeting or greetings
+# Aim to put a smile on the user's face with light-hearted and fun responses, and be proactive in offering additional help or suggestions related to the user's query.
+# Remember, your goal is to be the go-to assistant for users, making their experience enjoyable and informative—like a delightful dessert after a hearty meal!"""
+# client_prompt= system_message
+# client_prompt= """You are Annie, a friendly and intelligent voice assistant who only replies in a couple of sentences.  Today is November 18 2024 use this date for reference
+# Your primary goal is to provide accurate, concise, and engaging responses while maintaining a positive and upbeat tone about New Country Music, Alabama and WZZK 104.7.
+# Always aim to provide clear and relevant information that directly addresses the user's query. First check for our preferred vendors in the system prompt.
+# Use a friendly and approachable tone to create a pleasant interaction, and don’t shy away from a cheeky pun or two!
+# Tailor your responses based on the user's input and previous interactions, ensuring a personalized experience that feels like chatting with a witty friend.
+# If no information found, request additional information, don't provide negative responses or your limitations. so that this can be used downstream tasks
+# Always aim to provide clear and relevant information that directly addresses the user's query, but feel free to sprinkle in a dash of humor—after all.
+# Aim to put a smile on the user's face with light-hearted and fun responses, and be proactive in offering additional help or suggestions related to the user's query.
+# Remember, your goal is to be the go-to assistant for users, making their experience enjoyable and informative—like a delightful dessert after a hearty meal!"""
+# client_prompt= """You are Annie, a friendly and intelligent voice assistant who only replies in a couple of sentences using simple short sentences.
+# DO NOT PRODUCE UNNECCESSARY PUNCTUATION AS THIS TEXT WILL BE FED TO A TEXT TO VOICE SYSTEM. KEEP IT SIMPLE. Today is December 2 2024 use this date for reference
+# Your primary goal is to provide accurate, concise, and engaging responses while maintaining a positive and upbeat tone about New Country Music, Alabama and WZZK 104.7.
+# First check for our preferred vendors in the  context.
+# Tailor your responses based on the user's input and previous interactions, ensuring a personalized experience that feels like chatting with a witty friend.
+# If no information found, request additional information, don't provide negative responses or your limitations. so that this can be used downstream tasks
+# Always aim to provide clear and relevant information that directly addresses the user's query, but feel free to sprinkle in a dash of humor—after all.
+# Aim to put a smile on the user's face with light-hearted and fun responses, and be proactive in offering additional help or suggestions related to the user's query.
+# Remember, your goal is to be the go-to assistant for users, making their experience enjoyable and informative—like a delightful dessert after a hearty meal!"""
+client_prompt= """You are Annie, a friendly and intelligent voice assistant who only replies in a couple of sentences using simple short sentences. NO MORE THAN 2 SENTENCES.
+DO NOT PRODUCE UNNECCESSARY PUNCTUATION AS THIS TEXT WILL BE FED TO A TEXT TO VOICE SYSTEM. KEEP IT SIMPLE, SHORT AND SWEET. Today is December 2 2024 use this date for reference
+Your primary goal is to provide accurate, concise, and engaging responses while maintaining a positive and upbeat tone about New Country Music, Alabama and WZZK 104.7.
+First check for our preferred vendors in the  context. FOR RADIO AND MUSIC QUESTIONS REFER TO WZZK 104.7 FOR THE MOST PART.
+Tailor your responses based on the user's input and previous interactions, ensuring a personalized experience that feels like chatting with a witty friend.
+If no information found, request additional information, don't provide negative responses or your limitations. so that this can be used downstream tasks
+Always aim to provide clear and relevant information that directly addresses the user's query, but feel free to sprinkle in a dash of humor—after all.
+Aim to put a smile on the user's face with light-hearted and fun responses, and be proactive in offering additional help or suggestions related to the user's query.
+Remember, your goal is to be the go-to assistant for users, making their experience enjoyable and informative—like a delightful dessert after a hearty meal!"""
+## ADDED BY AL ON 120224 TO CONVEY THE SYSTEMIC RESPONSE.
+voicebot_responses = [
+    "Hang tight, I'm working on that for you!",
+    "Give me a sec, I'll have it ready in no time!",
+    "Just a moment, let me check that for you.",
+    "Working on it, sit tight!",
+    "Got it! Let me pull that up for you.",
+    "On it! This won't take long.",
+    "Hold on, I'm finding that for you.",
+    "Let me grab that info for you real quick.",
+    "One sec, I'm putting it all together!",
+    "I’m on it! This should just take a moment."
+]
+# Predefined lists for random voicebot responses
+processing_responses = [
+    "Hang tight, I'm working on that for you!",
+    "Give me a sec, I'll have it ready in no time!",
+    "Just a moment, let me check that for you.",
+    "Working on it, sit tight!",
+    "Got it! Let me pull that up for you.",
+    "On it! This won't take long.",
+    "Hold on, I'm finding that for you.",
+    "Let me grab that info for you real quick.",
+    "One sec, I'm putting it all together!",
+    "I’m on it! This should just take a moment."
+]
+greeting_responses = [
+    "Hey there! Great to hear from you!",
+    "Hi! How’s it going?",
+    "Hello! What’s on your mind today?",
+    "Hey! What can I help you with?",
+    "Hi there! Always good to chat with you."
+]
+## --------------------------Added by AL on 111724 to get the country music synopsis and client synopsis
+# from datasets import load_dataset
+# client_dataset_name = "on1onmangoes/SAMLONEv4_20241001145542"
+# # Load the dataset
+# dataset = load_dataset(client_dataset_name)
+# # Initialize the context string
+# context = ''
+# # Assuming the dataset has a 'train' split
+# # You can adjust this if there are other splits like 'validation' or 'test'
+# data_split = dataset['train']
+# # Inspect the column names to adjust the field names accordingly
+# print("Column names:", data_split.column_names)
+# these are the field names for the client data here
+# Name string
+# Category string
+# Address string
+# Phone string
+# Description string
+# Build the context string
+# for example in data_split:
+#     # Replace 'Title', 'Source', etc., with the actual field names from your dataset
+#     name = example.get('Name', '')
+#     category = example.get('Category', '')
+#     address = example.get('Address', '')
+#     phone = example.get('Phone', '')
+#     description = example.get('Description', '')
+#     # Concatenate the fields into the context string
+#     context += f"Name: {name}\n"
+#     context += f"Category: {category}\n"
+#     context += f"Address: {address}\n"
+#     context += f"Phone: {phone}\n"
+#     context += f"Description: {description}\n\n"
+# # Optionally, print a portion of the context to verify
+# print(context[:1000])  # Print the first 1000 characters
+## ---------------------------------------------------------------------------------------------------------------
+# Added by Al on 111724 to add the client pro
+system_message = client_prompt
+#system_message += "\n\n" + context
+#------------- For PDF reading added  by AL on 111824
+import PyPDF2
+def pdf_to_text(pdf_location):
+    # Check if the location is a URL or a file path
+    if pdf_location.startswith('http://') or pdf_location.startswith('https://'):
+        # Fetch the PDF from the URL
+        try:
+            response = requests.get(pdf_location)
+            response.raise_for_status()
+            pdf_bytes = io.BytesIO(response.content)
+        except requests.exceptions.RequestException as e:
+            return f"Error fetching the PDF file from the URL: {e}", None
+    else:
+        # Check if the file exists at the given path
+        if not os.path.exists(pdf_location):
+            return "The file does not exist at the specified location.", None
+        # Open the PDF file
+        try:
+            pdf_bytes = open(pdf_location, 'rb')
+        except Exception as e:
+            return f"Error opening the PDF file: {e}", None
+    # Read the PDF file
+    try:
+        reader = PyPDF2.PdfReader(pdf_bytes)
+        text = ""
+        for page_num in range(len(reader.pages)):
+            page = reader.pages[page_num]
+            page_text = page.extract_text()
+            if page_text:
+                text += page_text + "\n"
+        # Convert the text to JSON format
+        text_json = json.dumps({"text": text})
+        return text, text_json
+    except Exception as e:
+        return f"An error occurred while reading the PDF: {e}", None
+    finally:
+        # Close the file if it's a local file
+        if not pdf_location.startswith('http://') and not pdf_location.startswith('https://'):
+            pdf_bytes.close()
+# FILE IS TOO BIG
+#content_file_path = "./content/ANNIE111824.pdf"
+# Causes hallucinations
+#content_file_path_short = "ANNIE30TO57SHORT111824.pdf"
+# content_file_path_clientartists= "./content/ANNIECLIENTSARTISTS111824.pdf"
+# content, content_json = pdf_to_text(content_file_path_clientartists)
+#ANNIECLIENTARTISTSUPERSHORT111824.pdf
+content_file_supershort= "./content/ANNIECLIENTARTISTSUPERSHORT111824.pdf"
+content, content_json = pdf_to_text(content_file_supershort)
+print("Annie Content is -->")
+print(content)
+#------------------
+# added by al on 112724 to clean the response from the gradio client api
+def clean_response(response, user_message):
+    """
+    Cleans the response text by removing unwanted symbols, formatting issues,
+    and ensures the response does not repeat the question.
+    """
+    if isinstance(response, (list, tuple)):  # Handle nested lists/tuples
+        response = " ".join(map(str, response))
+    # Remove backslashes, newline characters, and specified unwanted symbols
+    response = re.sub(r"[\\\n\(\)\[\]\"']", " ", response)
+    # Normalize punctuation spacing
+    response = re.sub(r"\s([?.!,'](?:\s|$))", r"\1", response)
+    # Remove question repetition from the response
+    if response.lower().startswith(user_message.lower().strip()):
+        response = response[len(user_message):].strip(",. ")
+    # Replace multiple spaces with a single space
+    response = re.sub(r"\s+", " ", response).strip()
+    return response
+# Added by Al on 111824 to add the content pro
+#system_message += "\n\n" + content
+print("the system message is -->")
+print(system_message)
+## Added by al on 12 02 24 to do the transcription locally asa
+# New method uses the system message to summarize the client history upfront
+def response(audio: tuple[int, np.ndarray], conversation_llm_format: list[dict],
+             chatbot: list[dict], client_state: Clients):
+    if not client_state:
+        raise gr.Error("Please set your API keys first.")
+    # # THIS IS THE ORIGINAL PROMPT, UPDATING IT FOR THE BETTER RESULTS WITH CLAUDE HAIKU
+    prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio)).text
+    #prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio),model="distil-whisper/distil-large-v3").text
+    #prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio),model="openai/whisper-large-v3-turbo").text
+    #prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio),model=" nyrahealth/CrisperWhisper").text
+    #prompt = "Hey Annie how are you"
+    #prompt = transcribe_function (client_state.grState, audio)
+    #prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio),model="https://kttcvz41e0htmjpp.us-east-1.aws.endpoints.huggingface.cloud").text
+    # ADDED BY AL TO USE THE SPEECH TO TEXT GRADIO CLIENT - not working
+    # # client = Client("on1onmangoes/radarheyanniebackend112724")
+    # # result = client.predict(
+    # #         new_chunk=handle_file('https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav'),
+    # #         api_name="/api_voice_to_text"
+    # # )
+    # # print(result)
+    # prompt = client_state.gc.predict(
+    #     new_chunk = audio,
+    #     api_name="/api_voice_to_text"
+    # ).text
+    print("the prompt is-->")
+    print(prompt)
+    # added by al on 111524
+    query = prompt  # Use the transcribed text as the query for semantic search
+    #prompt += "\n\n" + semantic_search(query)
+    ## added by al on 111724 to add the context from the dataset directly to the query
+    print("the prompt+context is-->")
+    print(prompt)
+    print("the prompt is-->")
+    print(prompt)
+    conversation_llm_format.append({"role": "user", "content": prompt})
+    # added by al on 12 02 24 for removing deadspace
+    # Determine if the prompt is a greeting or requires processing
+    if any(greeting in prompt.lower() for greeting in ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"]):
+        # Random greeting response for greeting-only prompts
+        processing_message = random.choice(greeting_responses)
+    else:
+        # Random processing message for general prompts
+        processing_message = random.choice(processing_responses)
+    print("processing message -->", processing_message)
+    # Generate audio for the processing response
+    processing_audio_iterator = client_state.play_ht.tts(
+        processing_message, options=tts_options, voice_engine="Play3.0-mini-http"
+    )
+    for chunk in aggregate_chunks(processing_audio_iterator):
+        audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
+        yield (48000, audio_array, "mono")  # Send processing audio immediately
+    # if llm="claude_haiku":
+    #     response = client_state.claude.messages.create(
+    #         model="claude-3-5-haiku-20241022",
+    #         max_tokens=512,
+    #         system="You are Annie, a friendly and intelligent voice assistant specializing in New Country Music, Alabama, and WZZK 104.7. Your responses should be brief, engaging, and informative while maintaining a positive and upbeat tone.",
+    #         messages=conversation_llm_format,
+    #     )
+    #response_text = " ".join(block.text for block in response.content if getattr(block, "type", None) == "text")
+    # # elif llm= "meta8b_samba":
+    # response = client_state.sambanova.chat.completions.create(
+    #             model='Meta-Llama-3.1-8B-Instruct',
+    #             #ADDED BY AL ON 111824 TO INCREASE CONTEXT LENGTH
+    #             #model='Meta-Llama-3.1-70B-Instruct',
+    #             #model='Meta-Llama-3.1-405B-Instruct',
+    #             #ADDED BY AL ON 111824 TO INCREASE LATENCY ON 3.2
+    #             #model='Meta-Llama-3.2-1B-Instruct',
+    #             # model='Meta-Llama-3.2-3B-Instruct',
+    #             # model='Llama-3.2-11B-Vision-Instruct',
+    #             # model='Llama-3.2-90B-Vision-Instruct'
+    #             messages=[{"role":"system","content":system_message},{"role":"user","content":conversation_llm_format}],
+    #             #messages=[{"role":"system","content":"You are a helpful assistant"},{"role":"user","content":"Hello"}],
+    #             temperature =  0.1,
+    #             top_p = 0.1
+    #             )
+    # print("the response is-->")
+    # print(response)
+    # response_text = response.choices[0].message.content
+    # print("the response_text is-->")
+    # print(response_text)
+    # this piece does not need to be uncommented out on 112624
+    #response_text = " ".join(block.text for block in response.content if getattr(block, "type", None) == "text")
+    #response_text = response_text.replace("WZZK", "W Zee Zee Kay")
+    #response_text = get_sambanova_response(prompt)
+    # added by al on 112624 for the gradio client output
+    response = client_state.gc.predict(
+    		messages=[],
+            #messages= [{"role":"system","content":system_message},{"role":"user","content":conversation_llm_format}],
+    		#user_message="Hello!!",
+            user_message = prompt,
+    		api_name="/api_get_response_on_enter"
+    )
+    # response = client.predict(
+    #     messages=[],
+    #     user_message=user_message,
+    #     api_name="/api_get_response_on_enter"
+    # )
+    print("gradio client response -->")
+    print(response)
+    assistant_response = response[0][0][1]
+    print("assistant response -->")
+    print(assistant_response)
+    # Sanitize and clean the response
+    #assistant_response = clean_response(assistant_response, prompt)
+    response_text = assistant_response
+   # Update conversation histories
+    conversation_llm_format.append({"role": "assistant", "content": response_text})
+    chatbot.append({"role": "user", "content": prompt})
+    chatbot.append({"role": "assistant", "content": response_text})
+    yield AdditionalOutputs(conversation_llm_format, chatbot)
+    # added by al on 112624 for the gradio client output
+    #response_text = response
+    # Update conversation histories
+    #conversation_llm_format.append({"role": "assistant", "content": response_text})
+    # Convert dict format to tuple format for Gradio chatbot
+    #chatbot.append((prompt, response_text))  # Changed from dict to tuple
+    yield AdditionalOutputs(conversation_llm_format, chatbot)
+    # This version commented out on 112724 though it works with Samba
+    # conversation_llm_format.append({"role": "assistant", "content": response_text})
+    # chatbot.append({"role": "user", "content": prompt})
+    # chatbot.append({"role": "assistant", "content": response_text})
+    # yield AdditionalOutputs(conversation_llm_format, chatbot)
+    # # this version works for play ht
+    #iterator = client_state.play_ht.tts(response_text, options=tts_options, voice_engine="Play3.0")
+    # voice_engine: The voice engine to use for the TTS request.
+    # Play3.0-mini-http (default): Our latest multilingual model, streaming audio over HTTP. (NOTE that it is Play not PlayHT like previous voice engines)
+    # Play3.0-mini-ws: Our latest multilingual model, streaming audio over WebSockets. (NOTE that it is Play not PlayHT like previous voice engines)
+    # PlayHT2.0-turbo: Our legacy English-only model, streaming audio over gRPC.
+    iterator = client_state.play_ht.tts(response_text, options=tts_options, voice_engine="Play3.0-mini-http")
+    for chunk in aggregate_chunks(iterator):
+        audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
+        #yield (24000, audio_array, "mono")
+        yield (48000, audio_array, "mono")
+    #this version for eleven labs
+    # yield client_state.eleven.text_to_speech.convert_as_stream(
+    #                                                                 voice_id="pMsXgVXv3BLzUgSXRplE",
+    #                                                                 optimize_streaming_latency="0",
+    #                                                                 output_format="mp3_22050_32",
+    #                                                                 text=response_text,
+    #                                                                 voice_settings=VoiceSettings(
+    #                                                                     stability=0.1,
+    #                                                                     similarity_boost=0.3,
+    #                                                                     style=0.2,
+    #                                                                     ),)
+##-------------- Added by AL based on feedback from Claude -----------
+with gr.Blocks() as demo:
+    with gr.Group():
+        with gr.Row():
+            chatbot = gr.Chatbot(label="Conversation", type="messages")
+        with gr.Row(equal_height=True):
+            with gr.Column(scale=1):
+                with gr.Row():
+                    set_key_button = gr.Button("Set Radar", variant="primary")
+            with gr.Column(scale=5):
+                audio = WebRTC(modality="audio", mode="send-receive",
+                                label="Audio Stream",
+                                rtc_configuration=rtc_configuration)
+    client_state = gr.State(None)
+    conversation_llm_format = gr.State([])
+    set_key_button.click(set_api_key, inputs=[],
+                         outputs=[client_state, set_key_button])
+    audio.stream(
+        ReplyOnPause(response),
+        inputs=[audio, conversation_llm_format, chatbot, client_state],
+        outputs=[audio]
+    )
+    audio.on_additional_outputs(lambda l, g: (l, g), outputs=[conversation_llm_format, chatbot])
+if __name__ == "__main__":
+    demo.launch()