Spaces:

nickmuchi
/

Earnings-Call-Analysis-Whisperer

Running

App Files Files Community

nickmuchi commited on Aug 12, 2023

Commit

8e77d9f

1 Parent(s): 805e19a

Update pages/3_Earnings_Semantic_Search_🔎_.py

Browse files

Files changed (1) hide show

pages/3_Earnings_Semantic_Search_🔎_.py +75 -52

pages/3_Earnings_Semantic_Search_🔎_.py CHANGED Viewed

@@ -10,6 +10,8 @@ st.sidebar.header("Semantic Search")
 st.markdown("Earnings Semantic Search with LangChain, OpenAI & SBert")
 st.markdown(
     """
     <style>
@@ -59,7 +61,8 @@ st.markdown(
 )
 bi_enc_dict = {'mpnet-base-v2':"all-mpnet-base-v2",
-              'instructor-base': 'hkunlp/instructor-base'}
 search_input = st.text_input(
         label='Enter Your Search Query',value= "What key challenges did the business face?", key='search')
@@ -73,69 +76,89 @@ overlap_size = 50
 try:
-    if search_input:
-        if "sen_df" in st.session_state and "earnings_passages" in st.session_state:
-            ## Save to a dataframe for ease of visualization
-            sen_df = st.session_state['sen_df']
-            title = st.session_state['title']
-            earnings_text = st.session_state['earnings_passages']
-            print(f'earnings_to_be_embedded:{earnings_text}')
-            st.session_state.eval_set = generate_eval(
-            earnings_text, 10, 3000)
-            # Display the question-answer pairs in the sidebar with smaller text
-            for i, qa_pair in enumerate(st.session_state.eval_set):
-                st.sidebar.markdown(
-                    f"""
-                    <div class="css-card">
-                    <span class="card-tag">Question {i + 1}</span>
-                        <p style="font-size: 12px;">{qa_pair['question']}</p>
-                        <p style="font-size: 12px;">{qa_pair['answer']}</p>
-                    </div>
-                    """,
-                    unsafe_allow_html=True,
                 )
-            embedding_model = bi_enc_dict[sbert_model_name]
-            with st.spinner(
-                text=f"Loading {embedding_model} embedding model and Generating Response..."
-            ):
-                docsearch = process_corpus(earnings_text,title, embedding_model)
-                result = embed_text(search_input,docsearch)
-            references = [doc.page_content for doc in result['source_documents']]
-            answer = result['answer']
-            sentiment_label = gen_sentiment(answer)
-            ##### Sematic Search #####
-            df = pd.DataFrame.from_dict({'Text':[answer],'Sentiment':[sentiment_label]})
-            text_annotations = gen_annotated_text(df)[0]
-            with st.expander(label='Query Result', expanded=True):
-                annotated_text(text_annotations)
-            with st.expander(label='References from Corpus used to Generate Result'):
-                for ref in references:
-                    st.write(ref)
-        else:
-            st.write('Please ensure you have entered the YouTube URL or uploaded the Earnings Call file')
     else:

 st.markdown("Earnings Semantic Search with LangChain, OpenAI & SBert")
+starter_message = "Ask me anything about the Earnings Call!"
 st.markdown(
     """
     <style>
 )
 bi_enc_dict = {'mpnet-base-v2':"all-mpnet-base-v2",
+              'instructor-base': 'hkunlp/instructor-base',
+              'FlagEmbedding': 'BAAI/bge-base-en'}
 search_input = st.text_input(
         label='Enter Your Search Query',value= "What key challenges did the business face?", key='search')
 try:
+    if "sen_df" in st.session_state and "earnings_passages" in st.session_state:
+        ## Save to a dataframe for ease of visualization
+        sen_df = st.session_state['sen_df']
+        title = st.session_state['title']
+        earnings_text = st.session_state['earnings_passages']
+        print(f'earnings_to_be_embedded:{earnings_text}')
+        st.session_state.eval_set = generate_eval(
+        earnings_text, 10, 3000)
+        # Display the question-answer pairs in the sidebar with smaller text
+        for i, qa_pair in enumerate(st.session_state.eval_set):
+            st.sidebar.markdown(
+                f"""
+                <div class="css-card">
+                <span class="card-tag">Question {i + 1}</span>
+                    <p style="font-size: 12px;">{qa_pair['question']}</p>
+                    <p style="font-size: 12px;">{qa_pair['answer']}</p>
+                </div>
+                """,
+                unsafe_allow_html=True,
+            )
+        embedding_model = bi_enc_dict[sbert_model_name]
+        with st.spinner(
+            text=f"Loading {embedding_model} embedding model and Generating Response..."
+        ):
+            docsearch = create_vectorstore(earnings_text,title, embedding_model)
+            memory, agent_executor = create_memory_and_agent(search_input,docsearch)
+        if "messages" not in st.session_state or st.sidebar.button("Clear message history"):
+            st.session_state["messages"] = [AIMessage(content=starter_message)]
+        for msg in st.session_state.messages:
+            if isinstance(msg, AIMessage):
+                st.chat_message("assistant").write(msg.content)
+            elif isinstance(msg, HumanMessage):
+                st.chat_message("user").write(msg.content)
+            memory.chat_memory.add_message(msg)
+        if user_question := st.chat_input(placeholder=starter_message):
+            st.chat_message("user").write(user_question)
+            with st.chat_message("assistant"):
+                st_callback = StreamlitCallbackHandler(st.container())
+                response = agent_executor(
+                    {"input": user_question, "history": st.session_state.messages},
+                    callbacks=[st_callback],
+                    include_run_info=True,
                 )
+                answer = response["output"]
+                st.session_state.messages.append(AIMessage(content=answer))
+                st.write(answer)
+                memory.save_context({"input": user_question}, response)
+                st.session_state["messages"] = memory.buffer
+                run_id = response["__run"].run_id
+                col_blank, col_text, col1, col2 = st.columns([10, 2, 1, 1])
+            with st.expander(label='Query Result with Sentiment Tag', expanded=True):
+                sentiment_label = gen_sentiment(answer)
+                df = pd.DataFrame.from_dict({'Text':[answer],'Sentiment':[sentiment_label]})
+                text_annotations = gen_annotated_text(df)[0]
+                annotated_text(text_annotations)
     else: