YingxuHe commited on
Commit
aba3925
·
1 Parent(s): aea1886
src/content/agent.py CHANGED
@@ -131,6 +131,54 @@ def bottom_input_section():
131
  st.session_state.new_prompt = chat_input
132
 
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  def conversation_section():
135
  chat_message_container = st.container(height=480)
136
  if st.session_state.ag_audio_array.size:
@@ -170,49 +218,12 @@ def conversation_section():
170
  with chat_message_container.chat_message("assistant"):
171
  assistant_message = {"role": "assistant", "process": []}
172
  st.session_state.ag_messages.append(assistant_message)
173
-
174
- relevant_query_indices = retrieve_relevant_docs(one_time_prompt, STANDARD_QUERIES)
175
- if len(st.session_state.ag_messages) <= 2:
176
- relevant_query_indices.append(0)
177
-
178
- relevant_query_indices = list(set(relevant_query_indices).difference(st.session_state.ag_visited_query_indices))
179
-
180
- audio_info = []
181
- if relevant_query_indices:
182
- with st.status("Thought process...", expanded=True) as status:
183
- for idx in relevant_query_indices:
184
- error_msg, warnings, response = retrive_response_with_ui(
185
- model_name=MODEL_NAMES["with_lora"]["vllm_name"],
186
- prompt=STANDARD_QUERIES[idx]["query_text"],
187
- array_audio=st.session_state.ag_audio_array,
188
- base64_audio=st.session_state.ag_audio_base64,
189
- prefix=f"**{STANDARD_QUERIES[idx]['ui_text']}** :speech_balloon: : ",
190
- stream=True
191
- )
192
- audio_info.append(STANDARD_QUERIES[idx]["response_prefix_text"] + response)
193
-
194
- assistant_message["process"].append({
195
- "error": error_msg,
196
- "warnings": warnings,
197
- "content": response
198
- })
199
-
200
- status.update(state="complete")
201
-
202
- audio_information_prompt = ""
203
- if audio_info:
204
- audio_information_prompt = AUDIO_INFO_TEMPLATE.format(
205
- audio_information="\n".join(audio_info)
206
- )
207
-
208
- prompt = LLM_PROMPT_TEMPLATE.format(
209
- user_question=one_time_prompt,
210
- audio_information_prompt=audio_information_prompt
211
- )
212
 
213
  error_msg, warnings, response = retrive_response_with_ui(
214
  model_name=MODEL_NAMES["wo_lora"]["vllm_name"],
215
- prompt=prompt,
216
  array_audio=st.session_state.ag_audio_array,
217
  base64_audio="",
218
  stream=True,
@@ -221,7 +232,7 @@ def conversation_section():
221
 
222
  assistant_message.update({"error": error_msg, "warnings": warnings, "content": response})
223
  st.session_state.ag_model_messages.extend([
224
- {"role": "user", "content": prompt},
225
  {"role": "assistant", "content": response}
226
  ])
227
 
 
131
  st.session_state.new_prompt = chat_input
132
 
133
 
134
+ def _prepare_final_prompt_with_ui(one_time_prompt):
135
+ relevant_query_indices = retrieve_relevant_docs(one_time_prompt, STANDARD_QUERIES)
136
+ if len(st.session_state.ag_messages) <= 2:
137
+ relevant_query_indices.append(0)
138
+
139
+ relevant_query_indices = list(
140
+ set(relevant_query_indices).difference(st.session_state.ag_visited_query_indices)
141
+ )
142
+
143
+ st.session_state.ag_visited_query_indices.extend(relevant_query_indices)
144
+
145
+ if not relevant_query_indices:
146
+ return LLM_PROMPT_TEMPLATE.format(
147
+ user_question=one_time_prompt,
148
+ audio_information_prompt=""
149
+ )
150
+
151
+ audio_info = []
152
+ with st.status("Thought process...", expanded=True) as status:
153
+ for idx in relevant_query_indices:
154
+ error_msg, warnings, response = retrive_response_with_ui(
155
+ model_name=MODEL_NAMES["with_lora"]["vllm_name"],
156
+ prompt=STANDARD_QUERIES[idx]["query_text"],
157
+ array_audio=st.session_state.ag_audio_array,
158
+ base64_audio=st.session_state.ag_audio_base64,
159
+ prefix=f"**{STANDARD_QUERIES[idx]['ui_text']}** :speech_balloon: : ",
160
+ stream=True
161
+ )
162
+ audio_info.append(STANDARD_QUERIES[idx]["response_prefix_text"] + response)
163
+
164
+ st.session_state.ag_messages[-1]["process"].append({
165
+ "error": error_msg,
166
+ "warnings": warnings,
167
+ "content": response
168
+ })
169
+
170
+ status.update(state="complete")
171
+
172
+ audio_information_prompt = AUDIO_INFO_TEMPLATE.format(
173
+ audio_information="\n".join(audio_info)
174
+ )
175
+
176
+ return LLM_PROMPT_TEMPLATE.format(
177
+ user_question=one_time_prompt,
178
+ audio_information_prompt=audio_information_prompt
179
+ )
180
+
181
+
182
  def conversation_section():
183
  chat_message_container = st.container(height=480)
184
  if st.session_state.ag_audio_array.size:
 
218
  with chat_message_container.chat_message("assistant"):
219
  assistant_message = {"role": "assistant", "process": []}
220
  st.session_state.ag_messages.append(assistant_message)
221
+
222
+ final_prompt = _prepare_final_prompt_with_ui(one_time_prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
  error_msg, warnings, response = retrive_response_with_ui(
225
  model_name=MODEL_NAMES["wo_lora"]["vllm_name"],
226
+ prompt=final_prompt,
227
  array_audio=st.session_state.ag_audio_array,
228
  base64_audio="",
229
  stream=True,
 
232
 
233
  assistant_message.update({"error": error_msg, "warnings": warnings, "content": response})
234
  st.session_state.ag_model_messages.extend([
235
+ {"role": "user", "content": final_prompt},
236
  {"role": "assistant", "content": response}
237
  ])
238
 
src/content/common.py CHANGED
@@ -317,19 +317,19 @@ STANDARD_QUERIES = [
317
  },
318
  {
319
  "query_text": "May I know the gender of the speakers",
320
- "doc_text": "Please identify speaker gender by analyzing pitch, formants, harmonics, and prosody features, which reflect physiological and speech pattern differences between genders.",
321
  "response_prefix_text": "By analyzing pitch, formants, harmonics, and prosody features, which reflect physiological and speech pattern differences between genders: ",
322
  "ui_text": "gender recognition"
323
  },
324
  {
325
  "query_text": "May I know the nationality of the speakers",
326
- "doc_text": "Discover speakers' nationality, country, or the place he is coming from. Analyze speakers' accent, pronunciation patterns, intonation, rhythm, phoneme usage, and language-specific speech features influenced by cultural and linguistic backgrounds.",
327
  "response_prefix_text": "By analyzing accent, pronunciation patterns, intonation, rhythm, phoneme usage, and language-specific speech features influenced by cultural and linguistic backgrounds: ",
328
  "ui_text": "accent recognition"
329
  },
330
  {
331
  "query_text": "Can you guess which ethnic group this person is from based on their accent.",
332
- "doc_text": "Discover speakers' ethnic group, home country, or the place he is coming from, from speech features like accent, tone, intonation, phoneme variations, and vocal characteristics influenced by cultural, regional, and linguistic factors.",
333
  "response_prefix_text": "By analyzing speech features like accent, tone, intonation, phoneme variations, and vocal characteristics influenced by cultural, regional, and linguistic factors: ",
334
  "ui_text": "accent recognition"
335
  },
 
317
  },
318
  {
319
  "query_text": "May I know the gender of the speakers",
320
+ "doc_text": "Please identify the gender of the speaker. For instance, whether is the speaker male or female.",
321
  "response_prefix_text": "By analyzing pitch, formants, harmonics, and prosody features, which reflect physiological and speech pattern differences between genders: ",
322
  "ui_text": "gender recognition"
323
  },
324
  {
325
  "query_text": "May I know the nationality of the speakers",
326
+ "doc_text": "Discover speakers' nationality, country, or the place he is coming from, from his/her accent, pronunciation patterns, and other language-specific speech features influenced by cultural and linguistic backgrounds.",
327
  "response_prefix_text": "By analyzing accent, pronunciation patterns, intonation, rhythm, phoneme usage, and language-specific speech features influenced by cultural and linguistic backgrounds: ",
328
  "ui_text": "accent recognition"
329
  },
330
  {
331
  "query_text": "Can you guess which ethnic group this person is from based on their accent.",
332
+ "doc_text": "Discover speakers' ethnic group, home country, or the place he is coming from, from his/her accent, tone, and other vocal characteristics influenced by cultural, regional, and linguistic factors.",
333
  "response_prefix_text": "By analyzing speech features like accent, tone, intonation, phoneme variations, and vocal characteristics influenced by cultural, regional, and linguistic factors: ",
334
  "ui_text": "accent recognition"
335
  },
src/retrieval.py CHANGED
@@ -15,6 +15,6 @@ def load_retriever():
15
  def retrieve_relevant_docs(user_question, docs: List[Dict]) -> List[int]:
16
  scores = st.session_state.retriever.compute_score([[user_question, d["doc_text"]] for d in docs], normalize=True)
17
  normalized_scores = np.array(scores) / np.sum(scores)
18
-
19
- selected_indices = np.where((np.array(scores) > 0.02) & (normalized_scores > 0.3))[0]
20
  return selected_indices.tolist()
 
15
  def retrieve_relevant_docs(user_question, docs: List[Dict]) -> List[int]:
16
  scores = st.session_state.retriever.compute_score([[user_question, d["doc_text"]] for d in docs], normalize=True)
17
  normalized_scores = np.array(scores) / np.sum(scores)
18
+
19
+ selected_indices = np.where((np.array(scores) > 0.2) & (normalized_scores > 0.3))[0]
20
  return selected_indices.tolist()
style/app_style.css CHANGED
@@ -1,6 +1,19 @@
1
  div[data-testid="stMainBlockContainer"] {
2
  padding-top: 2rem;
3
  padding-bottom: 1rem;
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  }
5
 
6
  div[data-testid="stMainBlockContainer"] div[data-testid="stAudioInput"]>div {
@@ -25,6 +38,11 @@ div[data-testid="stChatMessage"]:has(> div[data-testid="stChatMessageAvatarUser"
25
  text-align: right;
26
  }
27
 
 
 
 
 
 
28
  /* audio quick actions */
29
 
30
  div[data-testid="stChatMessage"] div[data-testid="stVerticalBlock"]:has( audio[data-testid="stAudio"]) {
 
1
  div[data-testid="stMainBlockContainer"] {
2
  padding-top: 2rem;
3
  padding-bottom: 1rem;
4
+ height: 100%;
5
+ }
6
+
7
+ div[data-testid="stMainBlockContainer"]>div[data-testid="stVerticalBlockBorderWrapper"] {
8
+ height: 100%;
9
+ }
10
+
11
+ div[data-testid="stMainBlockContainer"]>div[data-testid="stVerticalBlockBorderWrapper"]>div {
12
+ height: 100%;
13
+ }
14
+
15
+ div[data-testid="stMainBlockContainer"]>div[data-testid="stVerticalBlockBorderWrapper"]>div>div {
16
+ height: 100%;
17
  }
18
 
19
  div[data-testid="stMainBlockContainer"] div[data-testid="stAudioInput"]>div {
 
38
  text-align: right;
39
  }
40
 
41
+ div[height="480"][data-testid="stVerticalBlockBorderWrapper"] {
42
+ height: 100%;
43
+ min-height: 380px;
44
+ }
45
+
46
  /* audio quick actions */
47
 
48
  div[data-testid="stChatMessage"] div[data-testid="stVerticalBlock"]:has( audio[data-testid="stAudio"]) {
style/normal_window.css CHANGED
@@ -1,7 +1,7 @@
1
- @media(min-width: 576px) {
2
  div[data-testid="stMainBlockContainer"] {
3
  padding-left: 5rem;
4
- padding-bottom: 5rem;
5
  }
6
 
7
  div[data-testid="stBottomBlockContainer"] {
 
1
+ @media(min-width: 800px) {
2
  div[data-testid="stMainBlockContainer"] {
3
  padding-left: 5rem;
4
+ padding-right: 5rem;
5
  }
6
 
7
  div[data-testid="stBottomBlockContainer"] {
style/small_window.css CHANGED
@@ -1,7 +1,7 @@
1
- @media(max-width: 576px) {
2
  div[data-testid="stMainBlockContainer"] {
3
  padding-left: 1rem;
4
- padding-bottom: 1rem;
5
  }
6
 
7
  div[data-testid="stMainBlockContainer"] div[data-testid="stVerticalBlock"]>div[data-testid="stElementContainer"]:has( div[data-testid="stHeadingWithActionElements"]) {
@@ -15,10 +15,4 @@
15
  div[data-testid="stSidebarCollapsedControl"] button[data-testid="stBaseButton-headerNoPadding"]::after {
16
  content: "More Use Cases"
17
  }
18
- }
19
-
20
- @media (max-width: 916px) and (max-height: 958px) {
21
- div[height="480"][data-testid="stVerticalBlockBorderWrapper"] {
22
- height: 380px;
23
- }
24
  }
 
1
+ @media(max-width: 800px) {
2
  div[data-testid="stMainBlockContainer"] {
3
  padding-left: 1rem;
4
+ padding-right: 1rem;
5
  }
6
 
7
  div[data-testid="stMainBlockContainer"] div[data-testid="stVerticalBlock"]>div[data-testid="stElementContainer"]:has( div[data-testid="stHeadingWithActionElements"]) {
 
15
  div[data-testid="stSidebarCollapsedControl"] button[data-testid="stBaseButton-headerNoPadding"]::after {
16
  content: "More Use Cases"
17
  }
 
 
 
 
 
 
18
  }