Merge branch 'feature/add_graphs_in_separate_pannel' into pr/15
Browse files- app.py +65 -31
- front/utils.py +50 -2
- sandbox/20240310 - CQA - Semantic Routing 1.ipynb +0 -0
- style.css +6 -1
app.py
CHANGED
@@ -32,7 +32,7 @@ from utils import create_user_id
|
|
32 |
# ClimateQ&A imports
|
33 |
from climateqa.engine.llm import get_llm
|
34 |
from climateqa.engine.vectorstore import get_pinecone_vectorstore
|
35 |
-
from climateqa.knowledge.retriever import ClimateQARetriever
|
36 |
from climateqa.engine.reranker import get_reranker
|
37 |
from climateqa.engine.embeddings import get_embeddings_function
|
38 |
from climateqa.engine.chains.prompts import audience_prompts
|
@@ -43,7 +43,7 @@ from climateqa.engine.keywords import make_keywords_chain
|
|
43 |
# from climateqa.engine.chains.answer_rag import make_rag_papers_chain
|
44 |
from climateqa.engine.graph import make_graph_agent,display_graph
|
45 |
|
46 |
-
from front.utils import make_html_source,parse_output_llm_with_sources,serialize_docs,make_toolbox
|
47 |
|
48 |
# Load environment variables in local mode
|
49 |
try:
|
@@ -133,6 +133,7 @@ async def chat(query,history,audience,sources,reports):
|
|
133 |
output_keywords = ""
|
134 |
gallery = []
|
135 |
start_streaming = False
|
|
|
136 |
|
137 |
steps_display = {
|
138 |
"categorize_intent":("🔄️ Analyzing user message",True),
|
@@ -151,10 +152,12 @@ async def chat(query,history,audience,sources,reports):
|
|
151 |
try:
|
152 |
docs = event["data"]["output"]["documents"]
|
153 |
docs_html = []
|
154 |
-
|
155 |
-
|
|
|
|
|
156 |
|
157 |
-
used_documents = used_documents + [d.metadata[
|
158 |
history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
|
159 |
|
160 |
docs_html = "".join(docs_html)
|
@@ -184,7 +187,7 @@ async def chat(query,history,audience,sources,reports):
|
|
184 |
if event["name"] == "categorize_intent" and event["event"] == "on_chain_start":
|
185 |
print("X")
|
186 |
|
187 |
-
yield history,docs_html,output_query,output_language,gallery #,output_query,output_keywords
|
188 |
|
189 |
except Exception as e:
|
190 |
print(event, "has failed")
|
@@ -212,12 +215,49 @@ async def chat(query,history,audience,sources,reports):
|
|
212 |
print(f"Error logging on Azure Blob Storage: {e}")
|
213 |
raise gr.Error(f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)")
|
214 |
|
215 |
-
|
216 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
217 |
|
|
|
|
|
218 |
if doc.metadata["chunk_type"] == "image":
|
219 |
try:
|
220 |
key = f"Image {i+1}"
|
|
|
221 |
image_path = doc.metadata["image_path"].split("documents/")[1]
|
222 |
img = get_image_from_azure_blob_storage(image_path)
|
223 |
|
@@ -225,31 +265,18 @@ async def chat(query,history,audience,sources,reports):
|
|
225 |
buffered = BytesIO()
|
226 |
img.save(buffered, format="PNG")
|
227 |
img_str = base64.b64encode(buffered.getvalue()).decode()
|
|
|
|
|
|
|
|
|
228 |
|
229 |
-
# Embedding the base64 string in Markdown
|
230 |
-
markdown_image = f"![Alt text](data:image/png;base64,{img_str})"
|
231 |
-
image_dict[key] = {"img":img,"md":markdown_image,"caption":doc.page_content,"key":key,"figure_code":doc.metadata["figure_code"]}
|
232 |
except Exception as e:
|
233 |
print(f"Skipped adding image {i} because of {e}")
|
|
|
|
|
|
|
234 |
|
235 |
-
|
236 |
-
|
237 |
-
gallery = [x["img"] for x in list(image_dict.values())]
|
238 |
-
img = list(image_dict.values())[0]
|
239 |
-
img_md = img["md"]
|
240 |
-
img_caption = img["caption"]
|
241 |
-
img_code = img["figure_code"]
|
242 |
-
if img_code != "N/A":
|
243 |
-
img_name = f"{img['key']} - {img['figure_code']}"
|
244 |
-
else:
|
245 |
-
img_name = f"{img['key']}"
|
246 |
-
|
247 |
-
history.append(ChatMessage(role="assistant", content = f"\n\n{img_md}\n<p class='chatbot-caption'><b>{img_name}</b> - {img_caption}</p>"))
|
248 |
-
# answer_yet = history[-1][1] + f"\n\n{img_md}\n<p class='chatbot-caption'><b>{img_name}</b> - {img_caption}</p>"
|
249 |
-
# history[-1] = (history[-1][0],answer_yet)
|
250 |
-
# history = [tuple(x) for x in history]
|
251 |
-
|
252 |
-
yield history,docs_html,output_query,output_language,gallery#,output_query,output_keywords
|
253 |
|
254 |
|
255 |
def save_feedback(feed: str, user_id):
|
@@ -382,6 +409,9 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
382 |
with gr.Tab("Sources",elem_id = "tab-citations",id = 1):
|
383 |
sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
|
384 |
docs_textbox = gr.State("")
|
|
|
|
|
|
|
385 |
|
386 |
# with Modal(visible = False) as config_modal:
|
387 |
with gr.Tab("Configuration",elem_id = "tab-config",id = 2):
|
@@ -415,6 +445,10 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
415 |
output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False)
|
416 |
|
417 |
|
|
|
|
|
|
|
|
|
418 |
|
419 |
#---------------------------------------------------------------------------------------
|
420 |
# OTHER TABS
|
@@ -463,13 +497,13 @@ with gr.Blocks(title="Climate Q&A", css_paths=os.getcwd()+ "/style.css", theme=t
|
|
463 |
|
464 |
(textbox
|
465 |
.submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
|
466 |
-
.then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component],concurrency_limit = 8,api_name = "chat_textbox")
|
467 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_textbox")
|
468 |
)
|
469 |
|
470 |
(examples_hidden
|
471 |
.change(start_chat, [examples_hidden,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
|
472 |
-
.then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component],concurrency_limit = 8,api_name = "chat_examples")
|
473 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
|
474 |
)
|
475 |
|
|
|
32 |
# ClimateQ&A imports
|
33 |
from climateqa.engine.llm import get_llm
|
34 |
from climateqa.engine.vectorstore import get_pinecone_vectorstore
|
35 |
+
# from climateqa.knowledge.retriever import ClimateQARetriever
|
36 |
from climateqa.engine.reranker import get_reranker
|
37 |
from climateqa.engine.embeddings import get_embeddings_function
|
38 |
from climateqa.engine.chains.prompts import audience_prompts
|
|
|
43 |
# from climateqa.engine.chains.answer_rag import make_rag_papers_chain
|
44 |
from climateqa.engine.graph import make_graph_agent,display_graph
|
45 |
|
46 |
+
from front.utils import make_html_source, make_html_figure_sources,parse_output_llm_with_sources,serialize_docs,make_toolbox
|
47 |
|
48 |
# Load environment variables in local mode
|
49 |
try:
|
|
|
133 |
output_keywords = ""
|
134 |
gallery = []
|
135 |
start_streaming = False
|
136 |
+
figures = '<div class="figures-container"> <p> Go to the "Figures" tab at the top of the page to see full size images </p> </div>'
|
137 |
|
138 |
steps_display = {
|
139 |
"categorize_intent":("🔄️ Analyzing user message",True),
|
|
|
152 |
try:
|
153 |
docs = event["data"]["output"]["documents"]
|
154 |
docs_html = []
|
155 |
+
textual_docs = [d for d in docs if d.metadata["chunk_type"] == "text"]
|
156 |
+
for i, d in enumerate(textual_docs, 1):
|
157 |
+
if d.metadata["chunk_type"] == "text":
|
158 |
+
docs_html.append(make_html_source(d, i))
|
159 |
|
160 |
+
used_documents = used_documents + [f"{d.metadata['short_name']} - {d.metadata['name']}" for d in docs]
|
161 |
history[-1].content = "Adding sources :\n\n - " + "\n - ".join(np.unique(used_documents))
|
162 |
|
163 |
docs_html = "".join(docs_html)
|
|
|
187 |
if event["name"] == "categorize_intent" and event["event"] == "on_chain_start":
|
188 |
print("X")
|
189 |
|
190 |
+
yield history,docs_html,output_query,output_language,gallery, figures #,output_query,output_keywords
|
191 |
|
192 |
except Exception as e:
|
193 |
print(event, "has failed")
|
|
|
215 |
print(f"Error logging on Azure Blob Storage: {e}")
|
216 |
raise gr.Error(f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)")
|
217 |
|
218 |
+
|
219 |
+
|
220 |
+
|
221 |
+
# image_dict = {}
|
222 |
+
# for i,doc in enumerate(docs):
|
223 |
+
|
224 |
+
# if doc.metadata["chunk_type"] == "image":
|
225 |
+
# try:
|
226 |
+
# key = f"Image {i+1}"
|
227 |
+
# image_path = doc.metadata["image_path"].split("documents/")[1]
|
228 |
+
# img = get_image_from_azure_blob_storage(image_path)
|
229 |
+
|
230 |
+
# # Convert the image to a byte buffer
|
231 |
+
# buffered = BytesIO()
|
232 |
+
# img.save(buffered, format="PNG")
|
233 |
+
# img_str = base64.b64encode(buffered.getvalue()).decode()
|
234 |
+
|
235 |
+
# # Embedding the base64 string in Markdown
|
236 |
+
# markdown_image = f"![Alt text](data:image/png;base64,{img_str})"
|
237 |
+
# image_dict[key] = {"img":img,"md":markdown_image,"short_name": doc.metadata["short_name"],"figure_code":doc.metadata["figure_code"],"caption":doc.page_content,"key":key,"figure_code":doc.metadata["figure_code"], "img_str" : img_str}
|
238 |
+
# except Exception as e:
|
239 |
+
# print(f"Skipped adding image {i} because of {e}")
|
240 |
+
|
241 |
+
# if len(image_dict) > 0:
|
242 |
+
|
243 |
+
# gallery = [x["img"] for x in list(image_dict.values())]
|
244 |
+
# img = list(image_dict.values())[0]
|
245 |
+
# img_md = img["md"]
|
246 |
+
# img_caption = img["caption"]
|
247 |
+
# img_code = img["figure_code"]
|
248 |
+
# if img_code != "N/A":
|
249 |
+
# img_name = f"{img['key']} - {img['figure_code']}"
|
250 |
+
# else:
|
251 |
+
# img_name = f"{img['key']}"
|
252 |
+
|
253 |
+
# history.append(ChatMessage(role="assistant", content = f"\n\n{img_md}\n<p class='chatbot-caption'><b>{img_name}</b> - {img_caption}</p>"))
|
254 |
|
255 |
+
docs_figures = [d for d in docs if d.metadata["chunk_type"] == "image"]
|
256 |
+
for i, doc in enumerate(docs_figures):
|
257 |
if doc.metadata["chunk_type"] == "image":
|
258 |
try:
|
259 |
key = f"Image {i+1}"
|
260 |
+
|
261 |
image_path = doc.metadata["image_path"].split("documents/")[1]
|
262 |
img = get_image_from_azure_blob_storage(image_path)
|
263 |
|
|
|
265 |
buffered = BytesIO()
|
266 |
img.save(buffered, format="PNG")
|
267 |
img_str = base64.b64encode(buffered.getvalue()).decode()
|
268 |
+
|
269 |
+
figures = figures + make_html_figure_sources(doc, i, img_str)
|
270 |
+
|
271 |
+
gallery.append(img)
|
272 |
|
|
|
|
|
|
|
273 |
except Exception as e:
|
274 |
print(f"Skipped adding image {i} because of {e}")
|
275 |
+
|
276 |
+
|
277 |
+
|
278 |
|
279 |
+
yield history,docs_html,output_query,output_language,gallery, figures#,output_query,output_keywords
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
280 |
|
281 |
|
282 |
def save_feedback(feed: str, user_id):
|
|
|
409 |
with gr.Tab("Sources",elem_id = "tab-citations",id = 1):
|
410 |
sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
|
411 |
docs_textbox = gr.State("")
|
412 |
+
|
413 |
+
|
414 |
+
|
415 |
|
416 |
# with Modal(visible = False) as config_modal:
|
417 |
with gr.Tab("Configuration",elem_id = "tab-config",id = 2):
|
|
|
445 |
output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False)
|
446 |
|
447 |
|
448 |
+
with gr.Tab("Figures",elem_id = "tab-figures",id = 3):
|
449 |
+
figures_cards = gr.HTML(show_label=False, elem_id="sources-figures")
|
450 |
+
|
451 |
+
|
452 |
|
453 |
#---------------------------------------------------------------------------------------
|
454 |
# OTHER TABS
|
|
|
497 |
|
498 |
(textbox
|
499 |
.submit(start_chat, [textbox,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_textbox")
|
500 |
+
.then(chat, [textbox,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component,figures_cards],concurrency_limit = 8,api_name = "chat_textbox")
|
501 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_textbox")
|
502 |
)
|
503 |
|
504 |
(examples_hidden
|
505 |
.change(start_chat, [examples_hidden,chatbot], [textbox,tabs,chatbot],queue = False,api_name = "start_chat_examples")
|
506 |
+
.then(chat, [examples_hidden,chatbot,dropdown_audience, dropdown_sources,dropdown_reports], [chatbot,sources_textbox,output_query,output_language,gallery_component, figures_cards],concurrency_limit = 8,api_name = "chat_examples")
|
507 |
.then(finish_chat, None, [textbox],api_name = "finish_chat_examples")
|
508 |
)
|
509 |
|
front/utils.py
CHANGED
@@ -55,7 +55,7 @@ def make_html_source(source,i):
|
|
55 |
score = meta['reranking_score']
|
56 |
if score > 0.8:
|
57 |
color = "score-green"
|
58 |
-
elif score > 0.
|
59 |
color = "score-orange"
|
60 |
else:
|
61 |
color = "score-red"
|
@@ -91,8 +91,9 @@ def make_html_source(source,i):
|
|
91 |
<div class="card card-image">
|
92 |
<div class="card-content">
|
93 |
<h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
|
94 |
-
<p>{content}</p>
|
95 |
<p class='ai-generated'>AI-generated description</p>
|
|
|
|
|
96 |
{relevancy_score}
|
97 |
</div>
|
98 |
<div class="card-footer">
|
@@ -107,6 +108,53 @@ def make_html_source(source,i):
|
|
107 |
return card
|
108 |
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
def make_toolbox(tool_name,description = "",checked = False,elem_id = "toggle"):
|
112 |
|
|
|
55 |
score = meta['reranking_score']
|
56 |
if score > 0.8:
|
57 |
color = "score-green"
|
58 |
+
elif score > 0.5:
|
59 |
color = "score-orange"
|
60 |
else:
|
61 |
color = "score-red"
|
|
|
91 |
<div class="card card-image">
|
92 |
<div class="card-content">
|
93 |
<h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
|
|
|
94 |
<p class='ai-generated'>AI-generated description</p>
|
95 |
+
<p>{content}</p>
|
96 |
+
|
97 |
{relevancy_score}
|
98 |
</div>
|
99 |
<div class="card-footer">
|
|
|
108 |
return card
|
109 |
|
110 |
|
111 |
+
def make_html_figure_sources(source,i,img_str):
|
112 |
+
meta = source.metadata
|
113 |
+
content = source.page_content.strip()
|
114 |
+
|
115 |
+
score = meta['reranking_score']
|
116 |
+
if score > 0.8:
|
117 |
+
color = "score-green"
|
118 |
+
elif score > 0.5:
|
119 |
+
color = "score-orange"
|
120 |
+
else:
|
121 |
+
color = "score-red"
|
122 |
+
|
123 |
+
toc_levels = []
|
124 |
+
if len(toc_levels) > 0:
|
125 |
+
name = f"<b>{toc_levels}</b><br/>{meta['name']}"
|
126 |
+
else:
|
127 |
+
name = meta['name']
|
128 |
+
|
129 |
+
relevancy_score = f"<p class=relevancy-score>Relevancy score: <span class='{color}'>{score:.1%}</span></p>"
|
130 |
+
|
131 |
+
if meta["figure_code"] != "N/A":
|
132 |
+
title = f"{meta['figure_code']} - {meta['short_name']}"
|
133 |
+
else:
|
134 |
+
title = f"{meta['short_name']}"
|
135 |
+
|
136 |
+
card = f"""
|
137 |
+
<div class="card card-image">
|
138 |
+
<div class="card-content">
|
139 |
+
<h2>Image {i} - {title} - Page {int(meta['page_number'])}</h2>
|
140 |
+
<p class='ai-generated'>AI-generated description</p>
|
141 |
+
<img src="data:image/png;base64, { img_str } alt="Alt text" />
|
142 |
+
|
143 |
+
<p>{content}</p>
|
144 |
+
|
145 |
+
{relevancy_score}
|
146 |
+
</div>
|
147 |
+
<div class="card-footer">
|
148 |
+
<span>{name}</span>
|
149 |
+
<a href="{meta['url']}#page={int(meta['page_number'])}" target="_blank" class="pdf-link">
|
150 |
+
<span role="img" aria-label="Open PDF">🔗</span>
|
151 |
+
</a>
|
152 |
+
</div>
|
153 |
+
</div>
|
154 |
+
"""
|
155 |
+
return card
|
156 |
+
|
157 |
+
|
158 |
|
159 |
def make_toolbox(tool_name,description = "",checked = False,elem_id = "toggle"):
|
160 |
|
sandbox/20240310 - CQA - Semantic Routing 1.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
style.css
CHANGED
@@ -206,6 +206,11 @@ label.selected{
|
|
206 |
overflow-y: auto !important;
|
207 |
}
|
208 |
|
|
|
|
|
|
|
|
|
|
|
209 |
div#tab-config{
|
210 |
height:calc(100vh - 190px) !important;
|
211 |
overflow-y: auto !important;
|
@@ -475,7 +480,7 @@ span.chatbot > p > img{
|
|
475 |
color:orange !important;
|
476 |
}
|
477 |
|
478 |
-
.score-
|
479 |
color:red !important;
|
480 |
}
|
481 |
.message-buttons-left.panel.message-buttons.with-avatar {
|
|
|
206 |
overflow-y: auto !important;
|
207 |
}
|
208 |
|
209 |
+
div#sources-figures{
|
210 |
+
height:calc(100vh - 190px) !important;
|
211 |
+
overflow-y: auto !important;
|
212 |
+
}
|
213 |
+
|
214 |
div#tab-config{
|
215 |
height:calc(100vh - 190px) !important;
|
216 |
overflow-y: auto !important;
|
|
|
480 |
color:orange !important;
|
481 |
}
|
482 |
|
483 |
+
.score-red{
|
484 |
color:red !important;
|
485 |
}
|
486 |
.message-buttons-left.panel.message-buttons.with-avatar {
|