Spaces:

nickmuchi
/

Earnings-Call-Analysis-Whisperer

Running

App Files Files Community

nickmuchi commited on Oct 5, 2022

Commit

d68b53b

1 Parent(s): c3a625a

Update pages/3_Earnings_Semantic_Search_🔎_.py

Browse files

Files changed (1) hide show

pages/3_Earnings_Semantic_Search_🔎_.py +24 -26

pages/3_Earnings_Semantic_Search_🔎_.py CHANGED Viewed

@@ -12,21 +12,9 @@ top_k = st.sidebar.slider("Number of Top Hits Generated",min_value=1,max_value=5
 window_size = st.sidebar.slider("Number of Sentences Generated in Search Response",min_value=1,max_value=5,value=3)
-    earnings_sentiment, earnings_sentences = sentiment_pipe(earnings_passages)
-    with st.expander("See Transcribed Earnings Text"):
-        st.write(f"Number of Sentences: {len(earnings_sentences)}")
-        st.write(earnings_passages)
-    ## Save to a dataframe for ease of visualization
-    sen_df = pd.DataFrame(earnings_sentiment)
-    sen_df['text'] = earnings_sentences
-    grouped = pd.DataFrame(sen_df['label'].value_counts()).reset_index()
-    grouped.columns = ['sentiment','count']
 passages = preprocess_plain_text(st.session_state['earnings_passages'],window_size=window_size)
 ##### Sematic Search #####
@@ -46,18 +34,28 @@ cross_scores = cross_encoder.predict(cross_inp)
 for idx in range(len(cross_scores)):
     hits[idx]['cross-score'] = cross_scores[idx]
-# Output of top-3 hits from bi-encoder
-st.markdown("\n-------------------------\n")
-st.subheader(f"Top-{top_k} Bi-Encoder Retrieval hits")
-hits = sorted(hits, key=lambda x: x['score'], reverse=True)
-cross_df = display_df_as_table(hits,top_k)
-st.write(cross_df.to_html(index=False), unsafe_allow_html=True)
 # Output of top-3 hits from re-ranker
-st.markdown("\n-------------------------\n")
-st.subheader(f"Top-{top_k} Cross-Encoder Re-ranker hits")
 hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
-rerank_df = display_df_as_table(hits,top_k,'cross-score')
-st.write(rerank_df.to_html(index=False), unsafe_allow_html=True

 window_size = st.sidebar.slider("Number of Sentences Generated in Search Response",min_value=1,max_value=5,value=3)
+## Save to a dataframe for ease of visualization
+sen_df = st.session_state['sen_def']
 passages = preprocess_plain_text(st.session_state['earnings_passages'],window_size=window_size)
 ##### Sematic Search #####
 for idx in range(len(cross_scores)):
     hits[idx]['cross-score'] = cross_scores[idx]
 # Output of top-3 hits from re-ranker
 hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
+score='cross-score'
+df = pd.DataFrame([(hit[score],passages[hit['corpus_id']]) for hit in hits[0:2]],columns=['Score','Text'])
+df['Score'] = round(df['Score'],2)
+tag_list = []
+def gen_annotated_text(para):
+    for i in sent_tokenize(para):
+        label = sen_df.loc[sen_df['text']==i, 'label'].values[0]
+        if label == 'Negative':
+            tag_list.append((i,label,'#faa'))
+        elif label == 'Positive':
+            tag_list.append((i,label,'#afa'))
+        else:
+            tag_list.append((i,label,'#fea'))
+    return tag_list
+text_to_annotate = [gen_annotated_text(para) for para in df.Text.tolist()]
+for i in text_to_annotate:
+    annotated_text(i)