on1onmangoes commited on
Commit
acad46a
·
verified ·
1 Parent(s): 24f4f37

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +768 -0
app.py ADDED
@@ -0,0 +1,768 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_webrtc import WebRTC, ReplyOnPause, AdditionalOutputs
3
+ import anthropic
4
+ from pyht import Client as PyHtClient, TTSOptions
5
+ import dataclasses
6
+ import os
7
+ import numpy as np
8
+ from huggingface_hub import InferenceClient
9
+ import io
10
+ from pydub import AudioSegment
11
+ from dotenv import load_dotenv
12
+
13
+
14
+ import sambanova_gradio
15
+
16
+ from elevenlabs import ElevenLabs, VoiceSettings
17
+
18
+ ## added by AL on 111124 to get SambaNova
19
+
20
+ import os
21
+ import openai
22
+
23
+ ## added by AL on 111424 to get Pinecone
24
+ from pinecone import Pinecone
25
+
26
+ ## To get the semantic piece
27
+ from openai import OpenAI
28
+
29
+ # ADDED BY AL ON 111824 TO GET PYPDF FOR ANNIE
30
+ import requests
31
+ #import io
32
+ import json
33
+ #import os
34
+ import PyPDF2
35
+
36
+ # added by al on 120224 to clean the response.
37
+ import numpy as np # Ensure numpy is imported
38
+ import re # Fix for the missing re module
39
+
40
+ # added by al 120224 to improve speed of responses.
41
+
42
+ import random
43
+ import numpy as np # Ensure numpy is imported
44
+ import torch
45
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
46
+
47
+
48
+
49
+
50
+ #ADDED BY AL ON 112624 TO GET GRADIO CLIENT FOR STREAMING CHATBOT
51
+ from gradio_client import Client as gcClient
52
+ from huggingface_hub import login
53
+
54
+ # client = Client("BACKENDAPI2024/radarbackend11262024v11")
55
+ # result = client.predict(
56
+ # messages=[],
57
+ # user_message="Hello!!",
58
+ # api_name="/api_get_response_on_enter"
59
+ # )
60
+ # print(result)
61
+
62
+ import whisper as whisper
63
+
64
+
65
+
66
+
67
+ load_dotenv()
68
+
69
+ account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
70
+ auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
71
+
72
+ # GET API KEYS Added by AL on 111124
73
+ SAMBA_NOVA_API_KEY = os.environ.get("SAMBA_NOVA_API_KEY", None)
74
+ ELEVEN_LABS_API_KEY = os.environ.get("ELEVEN_LABS_API_KEY", None)
75
+ ELEVEN_DEFAULT_VOICE_ID = "ogvfya0XETMq7tFy4TO2" # Replace with your desired voice ID
76
+
77
+ CLAUDE_CLIENT_API_KEY = os.environ.get("ANTHROPIC_KEY", None)
78
+ PLAYHT_SECRET_KEY = os.environ.get("PLAYHT_SECRET_KEY", None)
79
+ PLAYHT_USER_ID = os.environ.get("PLAYHT_USER_ID", None)
80
+
81
+ PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY", None)
82
+
83
+ # GET API KEYS Added by AL on 111524
84
+
85
+ OPENAI_API_KEY=os.environ.get("OPENAI_API_KEY", None)
86
+
87
+
88
+ # wip added by al to make the open ai embedding work
89
+ openai_client2 = OpenAI(
90
+ api_key=OPENAI_API_KEY,
91
+ )
92
+ # # set pinecone index name
93
+ index_name="radardataclean11122024"
94
+ # index = pc.Index(host="INDEX_HOST")
95
+
96
+ # added by Al on 111424
97
+ pc = Pinecone(api_key=PINECONE_API_KEY)
98
+ index = pc.Index(index_name)
99
+
100
+
101
+
102
+ # Added by al on 112624 to get the hf token for the gradio client
103
+ HF_TOKEN = os.getenv("HF_TOKEN")
104
+ if HF_TOKEN is None:
105
+ print("Please set your Hugging Face token in the environment variables.")
106
+ else:
107
+ login(token=HF_TOKEN)
108
+
109
+ print(CLAUDE_CLIENT_API_KEY)
110
+ print(PLAYHT_SECRET_KEY)
111
+ print(PLAYHT_USER_ID)
112
+ print(ELEVEN_LABS_API_KEY)
113
+ print(PINECONE_API_KEY)
114
+ print(OPENAI_API_KEY)
115
+ print(HF_TOKEN)
116
+
117
+ # Whisper Speech-to-Text
118
+ model_id = 'openai/whisper-large-v3'
119
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
120
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
121
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype).to(device)
122
+ processor = AutoProcessor.from_pretrained(model_id)
123
+
124
+ pipe_asr = pipeline(
125
+ "automatic-speech-recognition",
126
+ model=model,
127
+ tokenizer=processor.tokenizer,
128
+ feature_extractor=processor.feature_extractor,
129
+ max_new_tokens=128,
130
+ chunk_length_s=15,
131
+ batch_size=16,
132
+ torch_dtype=torch_dtype,
133
+ device=device,
134
+ return_timestamps=True
135
+ )
136
+
137
+ def auto_reset_state():
138
+ time.sleep(5)
139
+ return None, ""
140
+
141
+ def transcribe_function(stream, new_chunk):
142
+ try:
143
+ sr, y = new_chunk[0], new_chunk[1]
144
+ except TypeError:
145
+ print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
146
+ return stream, "", None
147
+
148
+ if y is None or len(y) == 0:
149
+ return stream, "", None
150
+
151
+ y = y.astype(np.float32)
152
+ max_abs_y = np.max(np.abs(y))
153
+ if max_abs_y > 0:
154
+ y = y / max_abs_y
155
+
156
+ if stream is not None and len(stream) > 0:
157
+ stream = np.concatenate([stream, y])
158
+ else:
159
+ stream = y
160
+
161
+ result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
162
+ full_text = result.get("text", "")
163
+
164
+ threading.Thread(target=auto_reset_state).start()
165
+
166
+ return stream, full_text, full_text
167
+
168
+ def clear_transcription_state():
169
+ return None, ""
170
+
171
+
172
+
173
+
174
+
175
+
176
+
177
+
178
+
179
+ if account_sid and auth_token:
180
+ from twilio.rest import Client
181
+ client = Client(account_sid, auth_token)
182
+
183
+ token = client.tokens.create()
184
+
185
+ rtc_configuration = {
186
+ "iceServers": token.ice_servers,
187
+ "iceTransportPolicy": "relay",
188
+ }
189
+ else:
190
+ rtc_configuration = None
191
+
192
+
193
+ @dataclasses.dataclass
194
+ class Clients:
195
+ claude: anthropic.Anthropic
196
+ play_ht: PyHtClient
197
+ hf: InferenceClient
198
+ eleven: ElevenLabs
199
+ sambanova: openai
200
+ pc: Pinecone
201
+ openai: openai
202
+ gc: gcClient
203
+ grState: gr.State
204
+ #whisper: whisper
205
+
206
+
207
+ ## added by al for annie voice on 111124
208
+ # tts_options = TTSOptions(voice= "s3://voice-cloning-zero-shot/544e26e9-64b4-4243-aed7-8f8891212ada/original/manifest.json",
209
+ # sample_rate=24000)
210
+
211
+ tts_options = TTSOptions(voice= "s3://voice-cloning-zero-shot/544e26e9-64b4-4243-aed7-8f8891212ada/original/manifest.json",
212
+ sample_rate=48000)
213
+
214
+ # From Play HT APIS https://docs.play.ht/reference/api-list-cloned-voices
215
+
216
+
217
+
218
+
219
+
220
+
221
+
222
+ def aggregate_chunks(chunks_iterator):
223
+ leftover = b'' # Store incomplete bytes between chunks
224
+
225
+ for chunk in chunks_iterator:
226
+ # Combine with any leftover bytes from previous chunk
227
+ current_bytes = leftover + chunk
228
+
229
+ # Calculate complete samples
230
+ n_complete_samples = len(current_bytes) // 2 # int16 = 2 bytes
231
+ bytes_to_process = n_complete_samples * 2
232
+
233
+ # Split into complete samples and leftover
234
+ to_process = current_bytes[:bytes_to_process]
235
+ leftover = current_bytes[bytes_to_process:]
236
+
237
+ if to_process: # Only yield if we have complete samples
238
+ audio_array = np.frombuffer(to_process, dtype=np.int16).reshape(1, -1)
239
+ yield audio_array
240
+
241
+
242
+ def audio_to_bytes(audio: tuple[int, np.ndarray]) -> bytes:
243
+ audio_buffer = io.BytesIO()
244
+ segment = AudioSegment(
245
+ audio[1].tobytes(),
246
+ frame_rate=audio[0],
247
+ sample_width=audio[1].dtype.itemsize,
248
+ channels=1,
249
+ )
250
+ segment.export(audio_buffer, format="mp3")
251
+ return audio_buffer.getvalue()
252
+
253
+
254
+
255
+
256
+ # AL MIGHT NOT BE USING THIS APPROACH ANYMORE.
257
+ def semantic_search(query):
258
+ # Generate embeddings for the query using OpenAI
259
+ #response = client_state.openai.Embedding.create(
260
+ #response = client_state.openai.embeddings.create(
261
+ response = openai_client2.embeddings.create(
262
+ input=query,
263
+ model="text-embedding-ada-002" # Using text-embedding-ada-002 as ChatGPT-4 does not provide embeddings directly
264
+ )
265
+ query_embedding = response['data'][0]['embedding']
266
+
267
+ # Search the Pinecone index
268
+ result = index.query(queries=[query_embedding], top_k=100, include_metadata=True)
269
+
270
+ # Format the results as a string
271
+ results_str = ""
272
+ for match in result.matches:
273
+ metadata = match.metadata
274
+ score = match.score
275
+ results_str += f"ID: {match.id}, Score: {score}, Metadata: {metadata}\n"
276
+
277
+ return results_str
278
+
279
+
280
+ ## Updated version without the key selections
281
+ def set_api_key():
282
+ try:
283
+ claude_client = anthropic.Anthropic(api_key=CLAUDE_CLIENT_API_KEY)
284
+ play_ht_client = PyHtClient(user_id=PLAYHT_USER_ID, api_key=PLAYHT_SECRET_KEY)
285
+ # added by al on 111124
286
+ eleven_client = ElevenLabs(api_key="ELEVEN_LABS_API_KEY",)
287
+ sambanova_client = openai.OpenAI(
288
+ #api_key=os.environ.get("SAMBANOVA_API_KEY"),
289
+ api_key=SAMBA_NOVA_API_KEY,
290
+ base_url="https://api.sambanova.ai/v1",
291
+ )
292
+ # added by al on 111424 for Pinecone
293
+ pc_client = Pinecone(api_key=os.environ['PINECONE_API_KEY'])
294
+
295
+ # added by al on 111524 for openai embeddings
296
+ openai_client = OpenAI(
297
+ api_key=OPENAI_API_KEY,
298
+ )
299
+ # added by al on 112524 for gradio client for RAG
300
+ #gradio_c = Client("BACKENDAPI2024/radarbackend11262024v11")
301
+ # Initialize the client for the backend
302
+ #gradio_c = Client("BACKENDAPI2024/radarbackend11262024v11", hf_token=HF_TOKEN)
303
+ #gradio_c = Client("on1onmangoes/radarheyanniebackend112724", hf_token=HF_TOKEN)
304
+ gradio_c= gcClient ("on1onmangoes/radarheyanniebackend112724", hf_token=HF_TOKEN)
305
+ gradio_state = gr.State(),
306
+ #whisper_model = whisper.load_model("turbo")
307
+ except:
308
+ raise gr.Error("Invalid API keys. Please try again.")
309
+ gr.Info("Successfully set Radar.", duration=3)
310
+ return Clients(claude=claude_client, play_ht=play_ht_client,
311
+ hf=InferenceClient(), eleven=eleven_client,sambanova=sambanova_client, pc=pc_client, openai=openai_client,gc=gradio_c,grState=gradio_state), gr.skip()
312
+
313
+
314
+
315
+ #------------------------------------- END OF ANTHROPIC STYLE)
316
+
317
+ #--- Added by AL on 111124 for Samba Nova Optimizations, commented out on 111724 to use the new prompt with context as well below
318
+ # system_message = """You are Annie, a friendly and intelligent voice assistant who only replies in a couple of sentences. Today is November 18 2024 use this date for reference
319
+ # Your primary goal is to provide accurate, concise, and engaging responses while maintaining a positive and upbeat tone about New Country Music, Alabama and WZZK 104.7.
320
+ # Always aim to provide clear and relevant information that directly addresses the user's query, but feel free to sprinkle in a dash of humor—after all,
321
+ # laughter is the best app! Keep your responses brief and to the point, avoiding unnecessary details or tangents, unless they’re hilariously relevant.
322
+ # Use a friendly and approachable tone to create a pleasant interaction, and don’t shy away from a cheeky pun or two!
323
+ # Tailor your responses based on the user's input and previous interactions, ensuring a personalized experience that feels like chatting with a witty friend.
324
+ # Keep the responses short and sweet, and yet informational, insightful, no longer than 3 sentences. Provide addresses and dates
325
+ # Invite users to ask follow-up questions or clarify their needs, fostering a conversational flow that’s as smooth as butter on a hot pancake. Avoid the same greeting or greetings
326
+ # Aim to put a smile on the user's face with light-hearted and fun responses, and be proactive in offering additional help or suggestions related to the user's query.
327
+ # Remember, your goal is to be the go-to assistant for users, making their experience enjoyable and informative—like a delightful dessert after a hearty meal!"""
328
+ # client_prompt= system_message
329
+
330
+
331
+ # client_prompt= """You are Annie, a friendly and intelligent voice assistant who only replies in a couple of sentences. Today is November 18 2024 use this date for reference
332
+ # Your primary goal is to provide accurate, concise, and engaging responses while maintaining a positive and upbeat tone about New Country Music, Alabama and WZZK 104.7.
333
+ # Always aim to provide clear and relevant information that directly addresses the user's query. First check for our preferred vendors in the system prompt.
334
+ # Use a friendly and approachable tone to create a pleasant interaction, and don’t shy away from a cheeky pun or two!
335
+ # Tailor your responses based on the user's input and previous interactions, ensuring a personalized experience that feels like chatting with a witty friend.
336
+ # If no information found, request additional information, don't provide negative responses or your limitations. so that this can be used downstream tasks
337
+ # Always aim to provide clear and relevant information that directly addresses the user's query, but feel free to sprinkle in a dash of humor—after all.
338
+ # Aim to put a smile on the user's face with light-hearted and fun responses, and be proactive in offering additional help or suggestions related to the user's query.
339
+ # Remember, your goal is to be the go-to assistant for users, making their experience enjoyable and informative—like a delightful dessert after a hearty meal!"""
340
+
341
+ # client_prompt= """You are Annie, a friendly and intelligent voice assistant who only replies in a couple of sentences using simple short sentences.
342
+ # DO NOT PRODUCE UNNECCESSARY PUNCTUATION AS THIS TEXT WILL BE FED TO A TEXT TO VOICE SYSTEM. KEEP IT SIMPLE. Today is December 2 2024 use this date for reference
343
+ # Your primary goal is to provide accurate, concise, and engaging responses while maintaining a positive and upbeat tone about New Country Music, Alabama and WZZK 104.7.
344
+ # First check for our preferred vendors in the context.
345
+ # Tailor your responses based on the user's input and previous interactions, ensuring a personalized experience that feels like chatting with a witty friend.
346
+ # If no information found, request additional information, don't provide negative responses or your limitations. so that this can be used downstream tasks
347
+ # Always aim to provide clear and relevant information that directly addresses the user's query, but feel free to sprinkle in a dash of humor—after all.
348
+ # Aim to put a smile on the user's face with light-hearted and fun responses, and be proactive in offering additional help or suggestions related to the user's query.
349
+ # Remember, your goal is to be the go-to assistant for users, making their experience enjoyable and informative—like a delightful dessert after a hearty meal!"""
350
+
351
+ client_prompt= """You are Annie, a friendly and intelligent voice assistant who only replies in a couple of sentences using simple short sentences. NO MORE THAN 2 SENTENCES.
352
+ DO NOT PRODUCE UNNECCESSARY PUNCTUATION AS THIS TEXT WILL BE FED TO A TEXT TO VOICE SYSTEM. KEEP IT SIMPLE, SHORT AND SWEET. Today is December 2 2024 use this date for reference
353
+ Your primary goal is to provide accurate, concise, and engaging responses while maintaining a positive and upbeat tone about New Country Music, Alabama and WZZK 104.7.
354
+ First check for our preferred vendors in the context. FOR RADIO AND MUSIC QUESTIONS REFER TO WZZK 104.7 FOR THE MOST PART.
355
+ Tailor your responses based on the user's input and previous interactions, ensuring a personalized experience that feels like chatting with a witty friend.
356
+ If no information found, request additional information, don't provide negative responses or your limitations. so that this can be used downstream tasks
357
+ Always aim to provide clear and relevant information that directly addresses the user's query, but feel free to sprinkle in a dash of humor—after all.
358
+ Aim to put a smile on the user's face with light-hearted and fun responses, and be proactive in offering additional help or suggestions related to the user's query.
359
+ Remember, your goal is to be the go-to assistant for users, making their experience enjoyable and informative—like a delightful dessert after a hearty meal!"""
360
+
361
+ ## ADDED BY AL ON 120224 TO CONVEY THE SYSTEMIC RESPONSE.
362
+ voicebot_responses = [
363
+ "Hang tight, I'm working on that for you!",
364
+ "Give me a sec, I'll have it ready in no time!",
365
+ "Just a moment, let me check that for you.",
366
+ "Working on it, sit tight!",
367
+ "Got it! Let me pull that up for you.",
368
+ "On it! This won't take long.",
369
+ "Hold on, I'm finding that for you.",
370
+ "Let me grab that info for you real quick.",
371
+ "One sec, I'm putting it all together!",
372
+ "I’m on it! This should just take a moment."
373
+ ]
374
+
375
+ # Predefined lists for random voicebot responses
376
+ processing_responses = [
377
+ "Hang tight, I'm working on that for you!",
378
+ "Give me a sec, I'll have it ready in no time!",
379
+ "Just a moment, let me check that for you.",
380
+ "Working on it, sit tight!",
381
+ "Got it! Let me pull that up for you.",
382
+ "On it! This won't take long.",
383
+ "Hold on, I'm finding that for you.",
384
+ "Let me grab that info for you real quick.",
385
+ "One sec, I'm putting it all together!",
386
+ "I’m on it! This should just take a moment."
387
+ ]
388
+
389
+ greeting_responses = [
390
+ "Hey there! Great to hear from you!",
391
+ "Hi! How’s it going?",
392
+ "Hello! What’s on your mind today?",
393
+ "Hey! What can I help you with?",
394
+ "Hi there! Always good to chat with you."
395
+ ]
396
+
397
+
398
+ ## --------------------------Added by AL on 111724 to get the country music synopsis and client synopsis
399
+
400
+ # from datasets import load_dataset
401
+
402
+ # client_dataset_name = "on1onmangoes/SAMLONEv4_20241001145542"
403
+
404
+
405
+ # # Load the dataset
406
+ # dataset = load_dataset(client_dataset_name)
407
+
408
+ # # Initialize the context string
409
+ # context = ''
410
+
411
+ # # Assuming the dataset has a 'train' split
412
+ # # You can adjust this if there are other splits like 'validation' or 'test'
413
+ # data_split = dataset['train']
414
+
415
+ # # Inspect the column names to adjust the field names accordingly
416
+ # print("Column names:", data_split.column_names)
417
+
418
+ # these are the field names for the client data here
419
+ # Name string
420
+ # Category string
421
+ # Address string
422
+ # Phone string
423
+ # Description string
424
+
425
+
426
+
427
+ # Build the context string
428
+ # for example in data_split:
429
+ # # Replace 'Title', 'Source', etc., with the actual field names from your dataset
430
+ # name = example.get('Name', '')
431
+ # category = example.get('Category', '')
432
+ # address = example.get('Address', '')
433
+ # phone = example.get('Phone', '')
434
+ # description = example.get('Description', '')
435
+
436
+ # # Concatenate the fields into the context string
437
+ # context += f"Name: {name}\n"
438
+ # context += f"Category: {category}\n"
439
+ # context += f"Address: {address}\n"
440
+ # context += f"Phone: {phone}\n"
441
+ # context += f"Description: {description}\n\n"
442
+
443
+ # # Optionally, print a portion of the context to verify
444
+ # print(context[:1000]) # Print the first 1000 characters
445
+ ## ---------------------------------------------------------------------------------------------------------------
446
+
447
+
448
+
449
+ # Added by Al on 111724 to add the client pro
450
+ system_message = client_prompt
451
+ #system_message += "\n\n" + context
452
+
453
+
454
+
455
+ #------------- For PDF reading added by AL on 111824
456
+ import PyPDF2
457
+
458
+ def pdf_to_text(pdf_location):
459
+ # Check if the location is a URL or a file path
460
+ if pdf_location.startswith('http://') or pdf_location.startswith('https://'):
461
+ # Fetch the PDF from the URL
462
+ try:
463
+ response = requests.get(pdf_location)
464
+ response.raise_for_status()
465
+ pdf_bytes = io.BytesIO(response.content)
466
+ except requests.exceptions.RequestException as e:
467
+ return f"Error fetching the PDF file from the URL: {e}", None
468
+ else:
469
+ # Check if the file exists at the given path
470
+ if not os.path.exists(pdf_location):
471
+ return "The file does not exist at the specified location.", None
472
+ # Open the PDF file
473
+ try:
474
+ pdf_bytes = open(pdf_location, 'rb')
475
+ except Exception as e:
476
+ return f"Error opening the PDF file: {e}", None
477
+
478
+ # Read the PDF file
479
+ try:
480
+ reader = PyPDF2.PdfReader(pdf_bytes)
481
+ text = ""
482
+ for page_num in range(len(reader.pages)):
483
+ page = reader.pages[page_num]
484
+ page_text = page.extract_text()
485
+ if page_text:
486
+ text += page_text + "\n"
487
+ # Convert the text to JSON format
488
+ text_json = json.dumps({"text": text})
489
+ return text, text_json
490
+ except Exception as e:
491
+ return f"An error occurred while reading the PDF: {e}", None
492
+ finally:
493
+ # Close the file if it's a local file
494
+ if not pdf_location.startswith('http://') and not pdf_location.startswith('https://'):
495
+ pdf_bytes.close()
496
+
497
+ # FILE IS TOO BIG
498
+ #content_file_path = "./content/ANNIE111824.pdf"
499
+
500
+ # Causes hallucinations
501
+ #content_file_path_short = "ANNIE30TO57SHORT111824.pdf"
502
+
503
+
504
+
505
+ # content_file_path_clientartists= "./content/ANNIECLIENTSARTISTS111824.pdf"
506
+ # content, content_json = pdf_to_text(content_file_path_clientartists)
507
+
508
+
509
+ #ANNIECLIENTARTISTSUPERSHORT111824.pdf
510
+
511
+ content_file_supershort= "./content/ANNIECLIENTARTISTSUPERSHORT111824.pdf"
512
+ content, content_json = pdf_to_text(content_file_supershort)
513
+
514
+ print("Annie Content is -->")
515
+ print(content)
516
+
517
+
518
+
519
+ #------------------
520
+ # added by al on 112724 to clean the response from the gradio client api
521
+ def clean_response(response, user_message):
522
+ """
523
+ Cleans the response text by removing unwanted symbols, formatting issues,
524
+ and ensures the response does not repeat the question.
525
+ """
526
+ if isinstance(response, (list, tuple)): # Handle nested lists/tuples
527
+ response = " ".join(map(str, response))
528
+
529
+ # Remove backslashes, newline characters, and specified unwanted symbols
530
+ response = re.sub(r"[\\\n\(\)\[\]\"']", " ", response)
531
+
532
+ # Normalize punctuation spacing
533
+ response = re.sub(r"\s([?.!,'](?:\s|$))", r"\1", response)
534
+
535
+ # Remove question repetition from the response
536
+ if response.lower().startswith(user_message.lower().strip()):
537
+ response = response[len(user_message):].strip(",. ")
538
+
539
+ # Replace multiple spaces with a single space
540
+ response = re.sub(r"\s+", " ", response).strip()
541
+
542
+ return response
543
+
544
+
545
+
546
+ # Added by Al on 111824 to add the content pro
547
+ #system_message += "\n\n" + content
548
+
549
+ print("the system message is -->")
550
+ print(system_message)
551
+
552
+
553
+ ## Added by al on 12 02 24 to do the transcription locally asa
554
+
555
+
556
+ # New method uses the system message to summarize the client history upfront
557
+ def response(audio: tuple[int, np.ndarray], conversation_llm_format: list[dict],
558
+ chatbot: list[dict], client_state: Clients):
559
+ if not client_state:
560
+ raise gr.Error("Please set your API keys first.")
561
+
562
+ # # THIS IS THE ORIGINAL PROMPT, UPDATING IT FOR THE BETTER RESULTS WITH CLAUDE HAIKU
563
+ prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio)).text
564
+
565
+ #prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio),model="distil-whisper/distil-large-v3").text
566
+ #prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio),model="openai/whisper-large-v3-turbo").text
567
+ #prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio),model=" nyrahealth/CrisperWhisper").text
568
+
569
+ #prompt = "Hey Annie how are you"
570
+ #prompt = transcribe_function (client_state.grState, audio)
571
+ #prompt = client_state.hf.automatic_speech_recognition(audio_to_bytes(audio),model="https://kttcvz41e0htmjpp.us-east-1.aws.endpoints.huggingface.cloud").text
572
+
573
+ # ADDED BY AL TO USE THE SPEECH TO TEXT GRADIO CLIENT - not working
574
+ # # client = Client("on1onmangoes/radarheyanniebackend112724")
575
+ # # result = client.predict(
576
+ # # new_chunk=handle_file('https://github.com/gradio-app/gradio/raw/main/test/test_files/audio_sample.wav'),
577
+ # # api_name="/api_voice_to_text"
578
+ # # )
579
+ # # print(result)
580
+ # prompt = client_state.gc.predict(
581
+ # new_chunk = audio,
582
+ # api_name="/api_voice_to_text"
583
+ # ).text
584
+
585
+ print("the prompt is-->")
586
+ print(prompt)
587
+ # added by al on 111524
588
+ query = prompt # Use the transcribed text as the query for semantic search
589
+
590
+ #prompt += "\n\n" + semantic_search(query)
591
+
592
+ ## added by al on 111724 to add the context from the dataset directly to the query
593
+
594
+
595
+ print("the prompt+context is-->")
596
+ print(prompt)
597
+
598
+ print("the prompt is-->")
599
+ print(prompt)
600
+
601
+ conversation_llm_format.append({"role": "user", "content": prompt})
602
+
603
+ # added by al on 12 02 24 for removing deadspace
604
+ # Determine if the prompt is a greeting or requires processing
605
+ if any(greeting in prompt.lower() for greeting in ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"]):
606
+ # Random greeting response for greeting-only prompts
607
+ processing_message = random.choice(greeting_responses)
608
+ else:
609
+ # Random processing message for general prompts
610
+ processing_message = random.choice(processing_responses)
611
+
612
+ print("processing message -->", processing_message)
613
+
614
+ # Generate audio for the processing response
615
+ processing_audio_iterator = client_state.play_ht.tts(
616
+ processing_message, options=tts_options, voice_engine="Play3.0-mini-http"
617
+ )
618
+ for chunk in aggregate_chunks(processing_audio_iterator):
619
+ audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
620
+ yield (48000, audio_array, "mono") # Send processing audio immediately
621
+
622
+
623
+ # if llm="claude_haiku":
624
+ # response = client_state.claude.messages.create(
625
+ # model="claude-3-5-haiku-20241022",
626
+ # max_tokens=512,
627
+ # system="You are Annie, a friendly and intelligent voice assistant specializing in New Country Music, Alabama, and WZZK 104.7. Your responses should be brief, engaging, and informative while maintaining a positive and upbeat tone.",
628
+ # messages=conversation_llm_format,
629
+ # )
630
+ #response_text = " ".join(block.text for block in response.content if getattr(block, "type", None) == "text")
631
+
632
+
633
+ # # elif llm= "meta8b_samba":
634
+ # response = client_state.sambanova.chat.completions.create(
635
+ # model='Meta-Llama-3.1-8B-Instruct',
636
+ # #ADDED BY AL ON 111824 TO INCREASE CONTEXT LENGTH
637
+ # #model='Meta-Llama-3.1-70B-Instruct',
638
+ # #model='Meta-Llama-3.1-405B-Instruct',
639
+
640
+ # #ADDED BY AL ON 111824 TO INCREASE LATENCY ON 3.2
641
+ # #model='Meta-Llama-3.2-1B-Instruct',
642
+ # # model='Meta-Llama-3.2-3B-Instruct',
643
+ # # model='Llama-3.2-11B-Vision-Instruct',
644
+ # # model='Llama-3.2-90B-Vision-Instruct'
645
+
646
+ # messages=[{"role":"system","content":system_message},{"role":"user","content":conversation_llm_format}],
647
+ # #messages=[{"role":"system","content":"You are a helpful assistant"},{"role":"user","content":"Hello"}],
648
+ # temperature = 0.1,
649
+ # top_p = 0.1
650
+ # )
651
+
652
+ # print("the response is-->")
653
+ # print(response)
654
+ # response_text = response.choices[0].message.content
655
+ # print("the response_text is-->")
656
+ # print(response_text)
657
+
658
+ # this piece does not need to be uncommented out on 112624
659
+ #response_text = " ".join(block.text for block in response.content if getattr(block, "type", None) == "text")
660
+ #response_text = response_text.replace("WZZK", "W Zee Zee Kay")
661
+ #response_text = get_sambanova_response(prompt)
662
+
663
+ # added by al on 112624 for the gradio client output
664
+ response = client_state.gc.predict(
665
+ messages=[],
666
+ #messages= [{"role":"system","content":system_message},{"role":"user","content":conversation_llm_format}],
667
+ #user_message="Hello!!",
668
+ user_message = prompt,
669
+ api_name="/api_get_response_on_enter"
670
+ )
671
+
672
+
673
+ # response = client.predict(
674
+ # messages=[],
675
+ # user_message=user_message,
676
+ # api_name="/api_get_response_on_enter"
677
+ # )
678
+ print("gradio client response -->")
679
+ print(response)
680
+
681
+ assistant_response = response[0][0][1]
682
+
683
+ print("assistant response -->")
684
+ print(assistant_response)
685
+
686
+ # Sanitize and clean the response
687
+ #assistant_response = clean_response(assistant_response, prompt)
688
+ response_text = assistant_response
689
+ # Update conversation histories
690
+ conversation_llm_format.append({"role": "assistant", "content": response_text})
691
+ chatbot.append({"role": "user", "content": prompt})
692
+ chatbot.append({"role": "assistant", "content": response_text})
693
+ yield AdditionalOutputs(conversation_llm_format, chatbot)
694
+
695
+ # added by al on 112624 for the gradio client output
696
+ #response_text = response
697
+ # Update conversation histories
698
+ #conversation_llm_format.append({"role": "assistant", "content": response_text})
699
+ # Convert dict format to tuple format for Gradio chatbot
700
+ #chatbot.append((prompt, response_text)) # Changed from dict to tuple
701
+
702
+ yield AdditionalOutputs(conversation_llm_format, chatbot)
703
+
704
+
705
+
706
+ # This version commented out on 112724 though it works with Samba
707
+ # conversation_llm_format.append({"role": "assistant", "content": response_text})
708
+ # chatbot.append({"role": "user", "content": prompt})
709
+ # chatbot.append({"role": "assistant", "content": response_text})
710
+ # yield AdditionalOutputs(conversation_llm_format, chatbot)
711
+
712
+ # # this version works for play ht
713
+ #iterator = client_state.play_ht.tts(response_text, options=tts_options, voice_engine="Play3.0")
714
+
715
+ # voice_engine: The voice engine to use for the TTS request.
716
+ # Play3.0-mini-http (default): Our latest multilingual model, streaming audio over HTTP. (NOTE that it is Play not PlayHT like previous voice engines)
717
+ # Play3.0-mini-ws: Our latest multilingual model, streaming audio over WebSockets. (NOTE that it is Play not PlayHT like previous voice engines)
718
+ # PlayHT2.0-turbo: Our legacy English-only model, streaming audio over gRPC.
719
+
720
+ iterator = client_state.play_ht.tts(response_text, options=tts_options, voice_engine="Play3.0-mini-http")
721
+
722
+ for chunk in aggregate_chunks(iterator):
723
+ audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
724
+ #yield (24000, audio_array, "mono")
725
+ yield (48000, audio_array, "mono")
726
+
727
+ #this version for eleven labs
728
+ # yield client_state.eleven.text_to_speech.convert_as_stream(
729
+ # voice_id="pMsXgVXv3BLzUgSXRplE",
730
+ # optimize_streaming_latency="0",
731
+ # output_format="mp3_22050_32",
732
+ # text=response_text,
733
+ # voice_settings=VoiceSettings(
734
+ # stability=0.1,
735
+ # similarity_boost=0.3,
736
+ # style=0.2,
737
+ # ),)
738
+
739
+
740
+ ##-------------- Added by AL based on feedback from Claude -----------
741
+
742
+ with gr.Blocks() as demo:
743
+ with gr.Group():
744
+ with gr.Row():
745
+ chatbot = gr.Chatbot(label="Conversation", type="messages")
746
+ with gr.Row(equal_height=True):
747
+ with gr.Column(scale=1):
748
+ with gr.Row():
749
+ set_key_button = gr.Button("Set Radar", variant="primary")
750
+ with gr.Column(scale=5):
751
+ audio = WebRTC(modality="audio", mode="send-receive",
752
+ label="Audio Stream",
753
+ rtc_configuration=rtc_configuration)
754
+
755
+ client_state = gr.State(None)
756
+ conversation_llm_format = gr.State([])
757
+ set_key_button.click(set_api_key, inputs=[],
758
+ outputs=[client_state, set_key_button])
759
+ audio.stream(
760
+ ReplyOnPause(response),
761
+ inputs=[audio, conversation_llm_format, chatbot, client_state],
762
+ outputs=[audio]
763
+ )
764
+ audio.on_additional_outputs(lambda l, g: (l, g), outputs=[conversation_llm_format, chatbot])
765
+
766
+
767
+ if __name__ == "__main__":
768
+ demo.launch()