Spaces:
Paused
Paused
on1onmangoes
commited on
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,768 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from gradio_webrtc import WebRTC, ReplyOnPause, AdditionalOutputs
|
3 |
+
import anthropic
|
4 |
+
from pyht import Client as PyHtClient, TTSOptions
|
5 |
+
import dataclasses
|
6 |
+
import os
|
7 |
+
import numpy as np
|
8 |
+
from huggingface_hub import InferenceClient
|
9 |
+
import io
|
10 |
+
from pydub import AudioSegment
|
11 |
+
from dotenv import load_dotenv
|
12 |
+
|
13 |
+
|
14 |
+
import sambanova_gradio
|
15 |
+
|
16 |
+
from elevenlabs import ElevenLabs, VoiceSettings
|
17 |
+
|
18 |
+
## added by AL on 111124 to get SambaNova
|
19 |
+
|
20 |
+
import os
|
21 |
+
import openai
|
22 |
+
|
23 |
+
## added by AL on 111424 to get Pinecone
|
24 |
+
from pinecone import Pinecone
|
25 |
+
|
26 |
+
## To get the semantic piece
|
27 |
+
from openai import OpenAI
|
28 |
+
|
29 |
+
# ADDED BY AL ON 111824 TO GET PYPDF FOR ANNIE
|
30 |
+
import requests
|
31 |
+
#import io
|
32 |
+
import json
|
33 |
+
#import os
|
34 |
+
import PyPDF2
|
35 |
+
|
36 |
+
# added by al on 120224 to clean the response.
|
37 |
+
import numpy as np # Ensure numpy is imported
|
38 |
+
import re # Fix for the missing re module
|
39 |
+
|
40 |
+
# added by al 120224 to improve speed of responses.
|
41 |
+
|
42 |
+
import random
|
43 |
+
import numpy as np # Ensure numpy is imported
|
44 |
+
import torch
|
45 |
+
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
46 |
+
|
47 |
+
|
48 |
+
|
49 |
+
|
50 |
+
#ADDED BY AL ON 112624 TO GET GRADIO CLIENT FOR STREAMING CHATBOT
|
51 |
+
from gradio_client import Client as gcClient
|
52 |
+
from huggingface_hub import login
|
53 |
+
|
54 |
+
# client = Client("BACKENDAPI2024/radarbackend11262024v11")
|
55 |
+
# result = client.predict(
|
56 |
+
# messages=[],
|
57 |
+
# user_message="Hello!!",
|
58 |
+
# api_name="/api_get_response_on_enter"
|
59 |
+
# )
|
60 |
+
# print(result)
|
61 |
+
|
62 |
+
import whisper as whisper
|
63 |
+
|
64 |
+
|
65 |
+
|
66 |
+
|
67 |
+
load_dotenv()
|
68 |
+
|
69 |
+
account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
|
70 |
+
auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
|
71 |
+
|
72 |
+
# GET API KEYS Added by AL on 111124
|
73 |
+
SAMBA_NOVA_API_KEY = os.environ.get("SAMBA_NOVA_API_KEY", None)
|
74 |
+
ELEVEN_LABS_API_KEY = os.environ.get("ELEVEN_LABS_API_KEY", None)
|
75 |
+
ELEVEN_DEFAULT_VOICE_ID = "ogvfya0XETMq7tFy4TO2" # Replace with your desired voice ID
|
76 |
+
|
77 |
+
CLAUDE_CLIENT_API_KEY = os.environ.get("ANTHROPIC_KEY", None)
|
78 |
+
PLAYHT_SECRET_KEY = os.environ.get("PLAYHT_SECRET_KEY", None)
|
79 |
+
PLAYHT_USER_ID = os.environ.get("PLAYHT_USER_ID", None)
|
80 |
+
|
81 |
+
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY", None)
|
82 |
+
|
83 |
+
# GET API KEYS Added by AL on 111524
|
84 |
+
|
85 |
+
OPENAI_API_KEY=os.environ.get("OPENAI_API_KEY", None)
|
86 |
+
|
87 |
+
|
88 |
+
# wip added by al to make the open ai embedding work
|
89 |
+
openai_client2 = OpenAI(
|
90 |
+
api_key=OPENAI_API_KEY,
|
91 |
+
)
|
92 |
+
# # set pinecone index name
|
93 |
+
index_name="radardataclean11122024"
|
94 |
+
# index = pc.Index(host="INDEX_HOST")
|
95 |
+
|
96 |
+
# added by Al on 111424
|
97 |
+
pc = Pinecone(api_key=PINECONE_API_KEY)
|
98 |
+
index = pc.Index(index_name)
|
99 |
+
|
100 |
+
|
101 |
+
|
102 |
+
# Added by al on 112624 to get the hf token for the gradio client
|
103 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
104 |
+
if HF_TOKEN is None:
|
105 |
+
print("Please set your Hugging Face token in the environment variables.")
|
106 |
+
else:
|
107 |
+
login(token=HF_TOKEN)
|
108 |
+
|
109 |
+
print(CLAUDE_CLIENT_API_KEY)
|
110 |
+
print(PLAYHT_SECRET_KEY)
|
111 |
+
print(PLAYHT_USER_ID)
|
112 |
+
print(ELEVEN_LABS_API_KEY)
|
113 |
+
print(PINECONE_API_KEY)
|
114 |
+
print(OPENAI_API_KEY)
|
115 |
+
print(HF_TOKEN)
|
116 |
+
|
117 |
+
# Whisper Speech-to-Text
|
118 |
+
model_id = 'openai/whisper-large-v3'
|
119 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
120 |
+
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
121 |
+
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device)
|
122 |
+
processor = AutoProcessor.from_pretrained(model_id)
|
123 |
+
|
124 |
+
pipe_asr = pipeline(
|
125 |
+
"automatic-speech-recognition",
|
126 |
+
model=model,
|
127 |
+
tokenizer=processor.tokenizer,
|
128 |
+
feature_extractor=processor.feature_extractor,
|
129 |
+
max_new_tokens=128,
|
130 |
+
chunk_length_s=15,
|
131 |
+
batch_size=16,
|
132 |
+
torch_dtype=torch_dtype,
|
133 |
+
device=device,
|
134 |
+
return_timestamps=True
|
135 |
+
)
|
136 |
+
|
137 |
+
def auto_reset_state():
|
138 |
+
time.sleep(5)
|
139 |
+
return None, ""
|
140 |
+
|
141 |
+
def transcribe_function(stream, new_chunk):
|
142 |
+
try:
|
143 |
+
sr, y = new_chunk[0], new_chunk[1]
|
144 |
+
except TypeError:
|
145 |
+
print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
|
146 |
+
return stream, "", None
|
147 |
+
|
148 |
+
if y is None or len(y) == 0:
|
149 |
+
return stream, "", None
|
150 |
+
|
151 |
+
y = y.astype(np.float32)
|
152 |
+
max_abs_y = np.max(np.abs(y))
|
153 |
+
if max_abs_y > 0:
|
154 |
+
y = y / max_abs_y
|
155 |
+
|
156 |
+
if stream is not None and len(stream) > 0:
|
157 |
+
stream = np.concatenate([stream, y])
|
158 |
+
else:
|
159 |
+
stream = y
|
160 |
+
|
161 |
+
result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
|
162 |
+
full_text = result.get("text", "")
|
163 |
+
|
164 |
+
threading.Thread(target=auto_reset_state).start()
|
165 |
+
|
166 |
+
return stream, full_text, full_text
|
167 |
+
|
168 |
+
def clear_transcription_state():
|
169 |
+
return None, ""
|
170 |
+
|
171 |
+
|
172 |
+
|
173 |
+
|
174 |
+
|
175 |
+
|
176 |
+
|
177 |
+
|
178 |
+
|
179 |
+
if account_sid and auth_token:
|
180 |
+
from twilio.rest import Client
|
181 |
+
client = Client(account_sid, auth_token)
|
182 |
+
|
183 |
+
token = client.tokens.create()
|
184 |
+
|
185 |
+
rtc_configuration = {
|
186 |
+
"iceServers": token.ice_servers,
|
187 |
+
"iceTransportPolicy": "relay",
|
188 |
+
}
|
189 |
+
else:
|
190 |
+
rtc_configuration = None
|
191 |
+
|
192 |
+
|
193 |
+
@dataclasses.dataclass
|
194 |
+
class Clients:
|
195 |
+
claude: anthropic.Anthropic
|
196 |
+
play_ht: PyHtClient
|
197 |
+
hf: InferenceClient
|
198 |
+
eleven: ElevenLabs
|
199 |
+
sambanova: openai
|
200 |
+
pc: Pinecone
|
201 |
+
openai: openai
|
202 |
+
gc: gcClient
|
203 |
+
grState: gr.State
|
204 |
+
#whisper: whisper
|
205 |
+
|
206 |
+
|
207 |
+
## added by al for annie voice on 111124
|
208 |
+
# tts_options = TTSOptions(voice= "s3://voice-cloning-zero-shot/544e26e9-64b4-4243-aed7-8f8891212ada/original/manifest.json",
|
209 |
+
# sample_rate=24000)
|
210 |
+
|
211 |
+
tts_options = TTSOptions(voice= "s3://voice-cloning-zero-shot/544e26e9-64b4-4243-aed7-8f8891212ada/original/manifest.json",
|
212 |
+
sample_rate=48000)
|
213 |
+
|
214 |
+
# From Play HT APIS https://docs.play.ht/reference/api-list-cloned-voices
|
215 |
+
|
216 |
+
|
217 |
+
|
218 |
+
|
219 |
+
|
220 |
+
|
221 |
+
|
222 |
+
def aggregate_chunks(chunks_iterator):
|
223 |
+
leftover = b'' # Store incomplete bytes between chunks
|
224 |
+
|
225 |
+
for chunk in chunks_iterator:
|
226 |
+
# Combine with any leftover bytes from previous chunk
|
227 |
+
current_bytes = leftover + chunk
|
228 |
+
|
229 |
+
# Calculate complete samples
|
230 |
+
n_complete_samples = len(current_bytes) // 2 # int16 = 2 bytes
|
231 |
+
bytes_to_process = n_complete_samples * 2
|
232 |
+
|
233 |
+
# Split into complete samples and leftover
|
234 |
+
to_process = current_bytes[:bytes_to_process]
|
235 |
+
leftover = current_bytes[bytes_to_process:]
|
236 |
+
|
237 |
+
if to_process: # Only yield if we have complete samples
|
238 |
+
audio_array = np.frombuffer(to_process, dtype=np.int16).reshape(1, -1)
|
239 |
+
yield audio_array
|
240 |
+
|
241 |
+
|
242 |
+
def audio_to_bytes(audio: tuple[int, np.ndarray]) -> bytes:
|
243 |
+
audio_buffer = io.BytesIO()
|
244 |
+
segment = AudioSegment(
|
245 |
+
audio[1].tobytes(),
|
246 |
+
frame_rate=audio[0],
|
247 |
+
sample_width=audio[1].dtype.itemsize,
|
248 |
+
channels=1,
|
249 |
+
)
|
250 |
+
segment.export(audio_buffer, format="mp3")
|
251 |
+
return audio_buffer.getvalue()
|
252 |
+
|
253 |
+
|
254 |
+
|
255 |
+
|
256 |
+
# AL MIGHT NOT BE USING THIS APPROACH ANYMORE.
|
257 |
+
def semantic_search(query):
|
258 |
+
# Generate embeddings for the query using OpenAI
|
259 |
+
#response = client_state.openai.Embedding.create(
|
260 |
+
#response = client_state.openai.embeddings.create(
|
261 |
+
response = openai_client2.embeddings.create(
|
262 |
+
input=query,
|
263 |
+
model="text-embedding-ada-002" # Using text-embedding-ada-002 as ChatGPT-4 does not provide embeddings directly
|
264 |
+
)
|
265 |
+
query_embedding = response['data'][0]['embedding']
|
266 |
+
|
267 |
+
# Search the Pinecone index
|
268 |
+
result = index.query(queries=[query_embedding], top_k=100, include_metadata=True)
|
269 |
+
|
270 |
+
# Format the results as a string
|
271 |
+
results_str = ""
|
272 |
+
for match in result.matches:
|
273 |
+
metadata = match.metadata
|
274 |
+
score = match.score
|
275 |
+
results_str += f"ID: {match.id}, Score: {score}, Metadata: {metadata}\n"
|
276 |
+
|
277 |
+
return results_str
|
278 |
+
|
279 |
+
|
280 |
+
## Updated version without the key selections
|
281 |
+
def set_api_key():
|
282 |
+
try:
|
283 |
+
claude_client = anthropic.Anthropic(api_key=CLAUDE_CLIENT_API_KEY)
|
284 |
+
play_ht_client = PyHtClient(user_id=PLAYHT_USER_ID, api_key=PLAYHT_SECRET_KEY)
|
285 |
+
# added by al on 111124
|
286 |
+
eleven_client = ElevenLabs(api_key="ELEVEN_LABS_API_KEY",)
|
287 |
+
sambanova_client = openai.OpenAI(
|
288 |
+
#api_key=os.environ.get("SAMBANOVA_API_KEY"),
|
289 |
+
api_key=SAMBA_NOVA_API_KEY,
|
290 |
+
base_url="https://api.sambanova.ai/v1",
|
291 |
+
)
|
292 |
+
# added by al on 111424 for Pinecone
|
293 |
+
pc_client = Pinecone(api_key=os.environ['PINECONE_API_KEY'])
|
294 |
+
|
295 |
+
# added by al on 111524 for openai embeddings
|
296 |
+
openai_client = OpenAI(
|
297 |
+
api_key=OPENAI_API_KEY,
|
298 |
+
)
|
299 |
+
# added by al on 112524 for gradio client for RAG
|
300 |
+
#gradio_c = Client("BACKENDAPI2024/radarbackend11262024v11")
|
301 |
+
# Initialize the client for the backend
|
302 |
+
#gradio_c = Client("BACKENDAPI2024/radarbackend11262024v11", hf_token=HF_TOKEN)
|
303 |
+
#gradio_c = Client("on1onmangoes/radarheyanniebackend112724", hf_token=HF_TOKEN)
|
304 |
+
gradio_c= gcClient ("on1onmangoes/radarheyanniebackend112724", hf_token=HF_TOKEN)
|
305 |
+
gradio_state = gr.State(),
|
306 |
+
#whisper_model = whisper.load_model("turbo")
|
307 |
+
except:
|
308 |
+
raise gr.Error("Invalid API keys. Please try again.")
|
309 |
+
gr.Info("Successfully set Radar.", duration=3)
|
310 |
+
return Clients(claude=claude_client, play_ht=play_ht_client,
|
311 |
+
hf=InferenceClient(), eleven=eleven_client,sambanova=sambanova_client, pc=pc_client, openai=openai_client,gc=gradio_c,grState=gradio_state), gr.skip()
|
312 |
+
|
313 |
+
|
314 |
+
|
315 |
+
#------------------------------------- END OF ANTHROPIC STYLE)
|
316 |
+
|
317 |
+
#--- Added by AL on 111124 for Samba Nova Optimizations, commented out on 111724 to use the new prompt with context as well below
|
318 |
+
# system_message = """You are Annie, a friendly and intelligent voice assistant who only replies in a couple of sentences. Today is November 18 2024 use this date for reference
|
319 |
+
# Your primary goal is to provide accurate, concise, and engaging responses while maintaining a positive and upbeat tone about New Country Music, Alabama and WZZK 104.7.
|
320 |
+
# Always aim to provide clear and relevant information that directly addresses the user's query, but feel free to sprinkle in a dash of humor—after all,
|
321 |
+
# laughter is the best app! Keep your responses brief and to the point, avoiding unnecessary details or tangents, unless they’re hilariously relevant.
|
322 |
+
# Use a friendly and approachable tone to create a pleasant interaction, and don’t shy away from a cheeky pun or two!
|
323 |
+
# Tailor your responses based on the user's input and previous interactions, ensuring a personalized experience that feels like chatting with a witty friend.
|
324 |
+
# Keep the responses short and sweet, and yet informational, insightful, no longer than 3 sentences. Provide addresses and dates
|
325 |
+
# Invite users to ask follow-up questions or clarify their needs, fostering a conversational flow that’s as smooth as butter on a hot pancake. Avoid the same greeting or greetings
|
326 |
+
# Aim to put a smile on the user's face with light-hearted and fun responses, and be proactive in offering additional help or suggestions related to the user's query.
|
327 |
+
# Remember, your goal is to be the go-to assistant for users, making their experience enjoyable and informative—like a delightful dessert after a hearty meal!"""
|
328 |
+
# client_prompt= system_message
|
329 |
+
|
330 |
+
|
331 |
+
# client_prompt= """You are Annie, a friendly and intelligent voice assistant who only replies in a couple of sentences. Today is November 18 2024 use this date for reference
|
332 |
+
# Your primary goal is to provide accurate, concise, and engaging responses while maintaining a positive and upbeat tone about New Country Music, Alabama and WZZK 104.7.
|
333 |
+
# Always aim to provide clear and relevant information that directly addresses the user's query. First check for our preferred vendors in the system prompt.
|
334 |
+
# Use a friendly and approachable tone to create a pleasant interaction, and don’t shy away from a cheeky pun or two!
|
335 |
+
# Tailor your responses based on the user's input and previous interactions, ensuring a personalized experience that feels like chatting with a witty friend.
|
336 |
+
# If no information found, request additional information, don't provide negative responses or your limitations. so that this can be used downstream tasks
|
337 |
+
# Always aim to provide clear and relevant information that directly addresses the user's query, but feel free to sprinkle in a dash of humor—after all.
|
338 |
+
# Aim to put a smile on the user's face with light-hearted and fun responses, and be proactive in offering additional help or suggestions related to the user's query.
|
339 |
+
# Remember, your goal is to be the go-to assistant for users, making their experience enjoyable and informative—like a delightful dessert after a hearty meal!"""
|
340 |
+
|
341 |
+
# client_prompt= """You are Annie, a friendly and intelligent voice assistant who only replies in a couple of sentences using simple short sentences.
|
342 |
+
# DO NOT PRODUCE UNNECCESSARY PUNCTUATION AS THIS TEXT WILL BE FED TO A TEXT TO VOICE SYSTEM. KEEP IT SIMPLE. Today is December 2 2024 use this date for reference
|
343 |
+
# Your primary goal is to provide accurate, concise, and engaging responses while maintaining a positive and upbeat tone about New Country Music, Alabama and WZZK 104.7.
|
344 |
+
# First check for our preferred vendors in the context.
|
345 |
+
# Tailor your responses based on the user's input and previous interactions, ensuring a personalized experience that feels like chatting with a witty friend.
|
346 |
+
# If no information found, request additional information, don't provide negative responses or your limitations. so that this can be used downstream tasks
|
347 |
+
# Always aim to provide clear and relevant information that directly addresses the user's query, but feel free to sprinkle in a dash of humor—after all.
|
348 |
+
# Aim to put a smile on the user's face with light-hearted and fun responses, and be proactive in offering additional help or suggestions related to the user's query.
|
349 |
+
# Remember, your goal is to be the go-to assistant for users, making their experience enjoyable and informative—like a delightful dessert after a hearty meal!"""
|
350 |
+
|
351 |
+
client_prompt= """You are Annie, a friendly and intelligent voice assistant who only replies in a couple of sentences using simple short sentences. NO MORE THAN 2 SENTENCES.
|
352 |
+
DO NOT PRODUCE UNNECCESSARY PUNCTUATION AS THIS TEXT WILL BE FED TO A TEXT TO VOICE SYSTEM. KEEP IT SIMPLE, SHORT AND SWEET. Today is December 2 2024 use this date for reference
|
353 |
+
Your primary goal is to provide accurate, concise, and engaging responses while maintaining a positive and upbeat tone about New Country Music, Alabama and WZZK 104.7.
|
354 |
+
First check for our preferred vendors in the context. FOR RADIO AND MUSIC QUESTIONS REFER TO WZZK 104.7 FOR THE MOST PART.
|
355 |
+
Tailor your responses based on the user's input and previous interactions, ensuring a personalized experience that feels like chatting with a witty friend.
|
356 |
+
If no information found, request additional information, don't provide negative responses or your limitations. so that this can be used downstream tasks
|
357 |
+
Always aim to provide clear and relevant information that directly addresses the user's query, but feel free to sprinkle in a dash of humor—after all.
|
358 |
+
Aim to put a smile on the user's face with light-hearted and fun responses, and be proactive in offering additional help or suggestions related to the user's query.
|
359 |
+
Remember, your goal is to be the go-to assistant for users, making their experience enjoyable and informative—like a delightful dessert after a hearty meal!"""
|
360 |
+
|
361 |
+
## ADDED BY AL ON 120224 TO CONVEY THE SYSTEMIC RESPONSE.
|
362 |
+
voicebot_responses = [
|
363 |
+
"Hang tight, I'm working on that for you!",
|
364 |
+
"Give me a sec, I'll have it ready in no time!",
|
365 |
+
"Just a moment, let me check that for you.",
|
366 |
+
"Working on it, sit tight!",
|
367 |
+
"Got it! Let me pull that up for you.",
|
368 |
+
"On it! This won't take long.",
|
369 |
+
"Hold on, I'm finding that for you.",
|
370 |
+
"Let me grab that info for you real quick.",
|
371 |
+
"One sec, I'm putting it all together!",
|
372 |
+
"I’m on it! This should just take a moment."
|
373 |
+
]
|
374 |
+
|
375 |
+
# Predefined lists for random voicebot responses
|
376 |
+
processing_responses = [
|
377 |
+
"Hang tight, I'm working on that for you!",
|
378 |
+
"Give me a sec, I'll have it ready in no time!",
|
379 |
+
"Just a moment, let me check that for you.",
|
380 |
+
"Working on it, sit tight!",
|
381 |
+
"Got it! Let me pull that up for you.",
|
382 |
+
"On it! This won't take long.",
|
383 |
+
"Hold on, I'm finding that for you.",
|
384 |
+
"Let me grab that info for you real quick.",
|
385 |
+
"One sec, I'm putting it all together!",
|
386 |
+
"I’m on it! This should just take a moment."
|
387 |
+
]
|
388 |
+
|
389 |
+
greeting_responses = [
|
390 |
+
"Hey there! Great to hear from you!",
|
391 |
+
"Hi! How’s it going?",
|
392 |
+
"Hello! What’s on your mind today?",
|
393 |
+
"Hey! What can I help you with?",
|
394 |
+
"Hi there! Always good to chat with you."
|
395 |
+
]
|
396 |
+
|
397 |
+
|
398 |
+
## --------------------------Added by AL on 111724 to get the country music synopsis and client synopsis
|
399 |
+
|
400 |
+
# from datasets import load_dataset
|
401 |
+
|
402 |
+
# client_dataset_name = "on1onmangoes/SAMLONEv4_20241001145542"
|
403 |
+
|
404 |
+
|
405 |
+
# # Load the dataset
|
406 |
+
# dataset = load_dataset(client_dataset_name)
|
407 |
+
|
408 |
+
# # Initialize the context string
|
409 |
+
# context = ''
|
410 |
+
|
411 |
+
# # Assuming the dataset has a 'train' split
|
412 |
+
# # You can adjust this if there are other splits like 'validation' or 'test'
|
413 |
+
# data_split = dataset['train']
|
414 |
+
|
415 |
+
# # Inspect the column names to adjust the field names accordingly
|
416 |
+
# print("Column names:", data_split.column_names)
|
417 |
+
|
418 |
+
# these are the field names for the client data here
|
419 |
+
# Name string
|
420 |
+
# Category string
|
421 |
+
# Address string
|
422 |
+
# Phone string
|
423 |
+
# Description string
|
424 |
+
|
425 |
+
|
426 |
+
|
427 |
+
# Build the context string
|
428 |
+
# for example in data_split:
|
429 |
+
# # Replace 'Title', 'Source', etc., with the actual field names from your dataset
|
430 |
+
# name = example.get('Name', '')
|
431 |
+
# category = example.get('Category', '')
|
432 |
+
# address = example.get('Address', '')
|
433 |
+
# phone = example.get('Phone', '')
|
434 |
+
# description = example.get('Description', '')
|
435 |
+
|
436 |
+
# # Concatenate the fields into the context string
|
437 |
+
# context += f"Name: {name}\n"
|
438 |
+
# context += f"Category: {category}\n"
|
439 |
+
# context += f"Address: {address}\n"
|
440 |
+
# context += f"Phone: {phone}\n"
|
441 |
+
# context += f"Description: {description}\n\n"
|
442 |
+
|
443 |
+
# # Optionally, print a portion of the context to verify
|
444 |
+
# print(context[:1000]) # Print the first 1000 characters
|
445 |
+
## ---------------------------------------------------------------------------------------------------------------
|
446 |
+
|
447 |
+
|
448 |
+
|
449 |
+
# Added by Al on 111724 to add the client pro
|
450 |
+
system_message = client_prompt
|
451 |
+
#system_message += "\n\n" + context
|
452 |
+
|
453 |
+
|
454 |
+
|
455 |
+
#------------- For PDF reading added by AL on 111824
|
456 |
+
import PyPDF2
|
457 |
+
|
458 |
+
def pdf_to_text(pdf_location):
|
459 |
+
# Check if the location is a URL or a file path
|
460 |
+
if pdf_location.startswith('http://') or pdf_location.startswith('https://'):
|
461 |
+
# Fetch the PDF from the URL
|
462 |
+
try:
|
463 |
+
response = requests.get(pdf_location)
|
464 |
+
response.raise_for_status()
|
465 |
+
pdf_bytes = io.BytesIO(response.content)
|
466 |
+
except requests.exceptions.RequestException as e:
|
467 |
+
return f"Error fetching the PDF file from the URL: {e}", None
|
468 |
+
else:
|
469 |
+
# Check if the file exists at the given path
|
470 |
+
if not os.path.exists(pdf_location):
|
471 |
+
return "The file does not exist at the specified location.", None
|
472 |
+
# Open the PDF file
|
473 |
+
try:
|
474 |
+
pdf_bytes = open(pdf_location, 'rb')
|
475 |
+
except Exception as e:
|
476 |
+
return f"Error opening the PDF file: {e}", None
|
477 |
+
|
478 |
+
# Read the PDF file
|
479 |
+
try:
|
480 |
+
reader = PyPDF2.PdfReader(pdf_bytes)
|
481 |
+
text = ""
|
482 |
+
for page_num in range(len(reader.pages)):
|
483 |
+
page = reader.pages[page_num]
|
484 |
+
page_text = page.extract_text()
|
485 |
+
if page_text:
|
486 |
+
text += page_text + "\n"
|
487 |
+
# Convert the text to JSON format
|
488 |
+
text_json = json.dumps({"text": text})
|
489 |
+
return text, text_json
|
490 |
+
except Exception as e:
|
491 |
+
return f"An error occurred while reading the PDF: {e}", None
|
492 |
+
finally:
|
493 |
+
# Close the file if it's a local file
|
494 |
+
if not pdf_location.startswith('http://') and not pdf_location.startswith('https://'):
|
495 |
+
pdf_bytes.close()
|
496 |
+
|
497 |
+
# FILE IS TOO BIG
|
498 |
+
#content_file_path = "./content/ANNIE111824.pdf"
|
499 |
+
|
500 |
+
# Causes hallucinations
|
501 |
+
#content_file_path_short = "ANNIE30TO57SHORT111824.pdf"
|
502 |
+
|
503 |
+
|
504 |
+
|
505 |
+
# content_file_path_clientartists= "./content/ANNIECLIENTSARTISTS111824.pdf"
|
506 |
+
# content, content_json = pdf_to_text(content_file_path_clientartists)
|
507 |
+
|
508 |
+
|
509 |
+
#ANNIECLIENTARTISTSUPERSHORT111824.pdf
|
510 |
+
|
511 |
+
content_file_supershort= "./content/ANNIECLIENTARTISTSUPERSHORT111824.pdf"
|
512 |
+
content, content_json = pdf_to_text(content_file_supershort)
|
513 |
+
|
514 |
+
print("Annie Content is -->")
|
515 |
+
print(content)
|
516 |
+
|
517 |
+
|
518 |
+
|
519 |
+
#------------------
|
520 |
+
# added by al on 112724 to clean the response from the gradio client api
|
521 |
+
def clean_response(response, user_message):
|
522 |
+
"""
|
523 |
+
Cleans the response text by removing unwanted symbols, formatting issues,
|
524 |
+
and ensures the response does not repeat the question.
|
525 |
+
"""
|
526 |
+
if isinstance(response, (list, tuple)): # Handle nested lists/tuples
|
527 |
+
response = " ".join(map(str, response))
|
528 |
+
|
529 |
+
# Remove backslashes, newline characters, and specified unwanted symbols
|
530 |
+
response = re.sub(r"[\\\n\(\)\[\]\"']", " ", response)
|
531 |
+
|
532 |
+
# Normalize punctuation spacing
|
533 |
+
response = re.sub(r"\s([?.!,'](?:\s|$))", r"\1", response)
|
534 |
+
|
535 |
+
# Remove question repetition from the response
|
536 |
+
if response.lower().startswith(user_message.lower().strip()):
|
537 |
+
response = response[len(user_message):].strip(",. ")
|
538 |
+
|
539 |
+
# Replace multiple spaces with a single space
|
540 |
+
response = re.sub(r"\s+", " ", response).strip()
|
541 |
+
|
542 |
+
return response
|
543 |
+
|
544 |
+
|
545 |
+
|
546 |
+
# Added by Al on 111824 to add the content pro
|
547 |
+
#system_message += "\n\n" + content
|
548 |
+
|
549 |
+
print("the system message is -->")
|
550 |
+
print(system_message)
|
551 |
+
|
552 |
+
|
553 |
+
## Added by al on 12 02 24 to do the transcription locally asa
|
554 |
+
|
555 |
+
|
556 |
+
# New method uses the system message to summarize the client history upfront
|
557 |
+
def response(audio: tuple[int, np.ndarray], conversation_llm_format: list[dict],
|
558 |
+
chatbot: list[dict], client_state: Clients):
|
559 |
+
if not client_state:
|
560 |
+
raise gr.Error("Please set your API keys first.")
|
561 |
+
|
562 |
+
# # THIS IS THE ORIGINAL PROMPT, UPDATING IT FOR THE BETTER RESULTS WITH CLAUDE HAIKU
|
563 |
+
prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio)).text
|
564 |
+
|
565 |
+
#prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio),model="distil-whisper/distil-large-v3").text
|
566 |
+
#prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio),model="openai/whisper-large-v3-turbo").text
|
567 |
+
#prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio),model=" nyrahealth/CrisperWhisper").text
|
568 |
+
|
569 |
+
#prompt = "Hey Annie how are you"
|
570 |
+
#prompt = transcribe_function (client_state.grState, audio)
|
571 |
+
#prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio),model="https://kttcvz41e0htmjpp.us-east-1.aws.endpoints.huggingface.cloud").text
|
572 |
+
|
573 |
+
# ADDED BY AL TO USE THE SPEECH TO TEXT GRADIO CLIENT - not working
|
574 |
+
# # client = Client("on1onmangoes/radarheyanniebackend112724")
|
575 |
+
# # result = client.predict(
|
576 |
+
# # new_chunk=handle_file('https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav'),
|
577 |
+
# # api_name="/api_voice_to_text"
|
578 |
+
# # )
|
579 |
+
# # print(result)
|
580 |
+
# prompt = client_state.gc.predict(
|
581 |
+
# new_chunk = audio,
|
582 |
+
# api_name="/api_voice_to_text"
|
583 |
+
# ).text
|
584 |
+
|
585 |
+
print("the prompt is-->")
|
586 |
+
print(prompt)
|
587 |
+
# added by al on 111524
|
588 |
+
query = prompt # Use the transcribed text as the query for semantic search
|
589 |
+
|
590 |
+
#prompt += "\n\n" + semantic_search(query)
|
591 |
+
|
592 |
+
## added by al on 111724 to add the context from the dataset directly to the query
|
593 |
+
|
594 |
+
|
595 |
+
print("the prompt+context is-->")
|
596 |
+
print(prompt)
|
597 |
+
|
598 |
+
print("the prompt is-->")
|
599 |
+
print(prompt)
|
600 |
+
|
601 |
+
conversation_llm_format.append({"role": "user", "content": prompt})
|
602 |
+
|
603 |
+
# added by al on 12 02 24 for removing deadspace
|
604 |
+
# Determine if the prompt is a greeting or requires processing
|
605 |
+
if any(greeting in prompt.lower() for greeting in ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"]):
|
606 |
+
# Random greeting response for greeting-only prompts
|
607 |
+
processing_message = random.choice(greeting_responses)
|
608 |
+
else:
|
609 |
+
# Random processing message for general prompts
|
610 |
+
processing_message = random.choice(processing_responses)
|
611 |
+
|
612 |
+
print("processing message -->", processing_message)
|
613 |
+
|
614 |
+
# Generate audio for the processing response
|
615 |
+
processing_audio_iterator = client_state.play_ht.tts(
|
616 |
+
processing_message, options=tts_options, voice_engine="Play3.0-mini-http"
|
617 |
+
)
|
618 |
+
for chunk in aggregate_chunks(processing_audio_iterator):
|
619 |
+
audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
|
620 |
+
yield (48000, audio_array, "mono") # Send processing audio immediately
|
621 |
+
|
622 |
+
|
623 |
+
# if llm="claude_haiku":
|
624 |
+
# response = client_state.claude.messages.create(
|
625 |
+
# model="claude-3-5-haiku-20241022",
|
626 |
+
# max_tokens=512,
|
627 |
+
# system="You are Annie, a friendly and intelligent voice assistant specializing in New Country Music, Alabama, and WZZK 104.7. Your responses should be brief, engaging, and informative while maintaining a positive and upbeat tone.",
|
628 |
+
# messages=conversation_llm_format,
|
629 |
+
# )
|
630 |
+
#response_text = " ".join(block.text for block in response.content if getattr(block, "type", None) == "text")
|
631 |
+
|
632 |
+
|
633 |
+
# # elif llm= "meta8b_samba":
|
634 |
+
# response = client_state.sambanova.chat.completions.create(
|
635 |
+
# model='Meta-Llama-3.1-8B-Instruct',
|
636 |
+
# #ADDED BY AL ON 111824 TO INCREASE CONTEXT LENGTH
|
637 |
+
# #model='Meta-Llama-3.1-70B-Instruct',
|
638 |
+
# #model='Meta-Llama-3.1-405B-Instruct',
|
639 |
+
|
640 |
+
# #ADDED BY AL ON 111824 TO INCREASE LATENCY ON 3.2
|
641 |
+
# #model='Meta-Llama-3.2-1B-Instruct',
|
642 |
+
# # model='Meta-Llama-3.2-3B-Instruct',
|
643 |
+
# # model='Llama-3.2-11B-Vision-Instruct',
|
644 |
+
# # model='Llama-3.2-90B-Vision-Instruct'
|
645 |
+
|
646 |
+
# messages=[{"role":"system","content":system_message},{"role":"user","content":conversation_llm_format}],
|
647 |
+
# #messages=[{"role":"system","content":"You are a helpful assistant"},{"role":"user","content":"Hello"}],
|
648 |
+
# temperature = 0.1,
|
649 |
+
# top_p = 0.1
|
650 |
+
# )
|
651 |
+
|
652 |
+
# print("the response is-->")
|
653 |
+
# print(response)
|
654 |
+
# response_text = response.choices[0].message.content
|
655 |
+
# print("the response_text is-->")
|
656 |
+
# print(response_text)
|
657 |
+
|
658 |
+
# this piece does not need to be uncommented out on 112624
|
659 |
+
#response_text = " ".join(block.text for block in response.content if getattr(block, "type", None) == "text")
|
660 |
+
#response_text = response_text.replace("WZZK", "W Zee Zee Kay")
|
661 |
+
#response_text = get_sambanova_response(prompt)
|
662 |
+
|
663 |
+
# added by al on 112624 for the gradio client output
|
664 |
+
response = client_state.gc.predict(
|
665 |
+
messages=[],
|
666 |
+
#messages= [{"role":"system","content":system_message},{"role":"user","content":conversation_llm_format}],
|
667 |
+
#user_message="Hello!!",
|
668 |
+
user_message = prompt,
|
669 |
+
api_name="/api_get_response_on_enter"
|
670 |
+
)
|
671 |
+
|
672 |
+
|
673 |
+
# response = client.predict(
|
674 |
+
# messages=[],
|
675 |
+
# user_message=user_message,
|
676 |
+
# api_name="/api_get_response_on_enter"
|
677 |
+
# )
|
678 |
+
print("gradio client response -->")
|
679 |
+
print(response)
|
680 |
+
|
681 |
+
assistant_response = response[0][0][1]
|
682 |
+
|
683 |
+
print("assistant response -->")
|
684 |
+
print(assistant_response)
|
685 |
+
|
686 |
+
# Sanitize and clean the response
|
687 |
+
#assistant_response = clean_response(assistant_response, prompt)
|
688 |
+
response_text = assistant_response
|
689 |
+
# Update conversation histories
|
690 |
+
conversation_llm_format.append({"role": "assistant", "content": response_text})
|
691 |
+
chatbot.append({"role": "user", "content": prompt})
|
692 |
+
chatbot.append({"role": "assistant", "content": response_text})
|
693 |
+
yield AdditionalOutputs(conversation_llm_format, chatbot)
|
694 |
+
|
695 |
+
# added by al on 112624 for the gradio client output
|
696 |
+
#response_text = response
|
697 |
+
# Update conversation histories
|
698 |
+
#conversation_llm_format.append({"role": "assistant", "content": response_text})
|
699 |
+
# Convert dict format to tuple format for Gradio chatbot
|
700 |
+
#chatbot.append((prompt, response_text)) # Changed from dict to tuple
|
701 |
+
|
702 |
+
yield AdditionalOutputs(conversation_llm_format, chatbot)
|
703 |
+
|
704 |
+
|
705 |
+
|
706 |
+
# This version commented out on 112724 though it works with Samba
|
707 |
+
# conversation_llm_format.append({"role": "assistant", "content": response_text})
|
708 |
+
# chatbot.append({"role": "user", "content": prompt})
|
709 |
+
# chatbot.append({"role": "assistant", "content": response_text})
|
710 |
+
# yield AdditionalOutputs(conversation_llm_format, chatbot)
|
711 |
+
|
712 |
+
# # this version works for play ht
|
713 |
+
#iterator = client_state.play_ht.tts(response_text, options=tts_options, voice_engine="Play3.0")
|
714 |
+
|
715 |
+
# voice_engine: The voice engine to use for the TTS request.
|
716 |
+
# Play3.0-mini-http (default): Our latest multilingual model, streaming audio over HTTP. (NOTE that it is Play not PlayHT like previous voice engines)
|
717 |
+
# Play3.0-mini-ws: Our latest multilingual model, streaming audio over WebSockets. (NOTE that it is Play not PlayHT like previous voice engines)
|
718 |
+
# PlayHT2.0-turbo: Our legacy English-only model, streaming audio over gRPC.
|
719 |
+
|
720 |
+
iterator = client_state.play_ht.tts(response_text, options=tts_options, voice_engine="Play3.0-mini-http")
|
721 |
+
|
722 |
+
for chunk in aggregate_chunks(iterator):
|
723 |
+
audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
|
724 |
+
#yield (24000, audio_array, "mono")
|
725 |
+
yield (48000, audio_array, "mono")
|
726 |
+
|
727 |
+
#this version for eleven labs
|
728 |
+
# yield client_state.eleven.text_to_speech.convert_as_stream(
|
729 |
+
# voice_id="pMsXgVXv3BLzUgSXRplE",
|
730 |
+
# optimize_streaming_latency="0",
|
731 |
+
# output_format="mp3_22050_32",
|
732 |
+
# text=response_text,
|
733 |
+
# voice_settings=VoiceSettings(
|
734 |
+
# stability=0.1,
|
735 |
+
# similarity_boost=0.3,
|
736 |
+
# style=0.2,
|
737 |
+
# ),)
|
738 |
+
|
739 |
+
|
740 |
+
##-------------- Added by AL based on feedback from Claude -----------
|
741 |
+
|
742 |
+
with gr.Blocks() as demo:
|
743 |
+
with gr.Group():
|
744 |
+
with gr.Row():
|
745 |
+
chatbot = gr.Chatbot(label="Conversation", type="messages")
|
746 |
+
with gr.Row(equal_height=True):
|
747 |
+
with gr.Column(scale=1):
|
748 |
+
with gr.Row():
|
749 |
+
set_key_button = gr.Button("Set Radar", variant="primary")
|
750 |
+
with gr.Column(scale=5):
|
751 |
+
audio = WebRTC(modality="audio", mode="send-receive",
|
752 |
+
label="Audio Stream",
|
753 |
+
rtc_configuration=rtc_configuration)
|
754 |
+
|
755 |
+
client_state = gr.State(None)
|
756 |
+
conversation_llm_format = gr.State([])
|
757 |
+
set_key_button.click(set_api_key, inputs=[],
|
758 |
+
outputs=[client_state, set_key_button])
|
759 |
+
audio.stream(
|
760 |
+
ReplyOnPause(response),
|
761 |
+
inputs=[audio, conversation_llm_format, chatbot, client_state],
|
762 |
+
outputs=[audio]
|
763 |
+
)
|
764 |
+
audio.on_additional_outputs(lambda l, g: (l, g), outputs=[conversation_llm_format, chatbot])
|
765 |
+
|
766 |
+
|
767 |
+
if __name__ == "__main__":
|
768 |
+
demo.launch()
|