nickmuchi commited on
Commit
d68b53b
Β·
1 Parent(s): c3a625a

Update pages/3_Earnings_Semantic_Search_πŸ”Ž_.py

Browse files
pages/3_Earnings_Semantic_Search_πŸ”Ž_.py CHANGED
@@ -12,21 +12,9 @@ top_k = st.sidebar.slider("Number of Top Hits Generated",min_value=1,max_value=5
12
 
13
  window_size = st.sidebar.slider("Number of Sentences Generated in Search Response",min_value=1,max_value=5,value=3)
14
 
15
- earnings_sentiment, earnings_sentences = sentiment_pipe(earnings_passages)
 
16
 
17
- with st.expander("See Transcribed Earnings Text"):
18
- st.write(f"Number of Sentences: {len(earnings_sentences)}")
19
-
20
- st.write(earnings_passages)
21
-
22
-
23
- ## Save to a dataframe for ease of visualization
24
- sen_df = pd.DataFrame(earnings_sentiment)
25
- sen_df['text'] = earnings_sentences
26
- grouped = pd.DataFrame(sen_df['label'].value_counts()).reset_index()
27
- grouped.columns = ['sentiment','count']
28
-
29
-
30
  passages = preprocess_plain_text(st.session_state['earnings_passages'],window_size=window_size)
31
 
32
  ##### Sematic Search #####
@@ -46,18 +34,28 @@ cross_scores = cross_encoder.predict(cross_inp)
46
  for idx in range(len(cross_scores)):
47
  hits[idx]['cross-score'] = cross_scores[idx]
48
 
49
- # Output of top-3 hits from bi-encoder
50
- st.markdown("\n-------------------------\n")
51
- st.subheader(f"Top-{top_k} Bi-Encoder Retrieval hits")
52
- hits = sorted(hits, key=lambda x: x['score'], reverse=True)
53
-
54
- cross_df = display_df_as_table(hits,top_k)
55
- st.write(cross_df.to_html(index=False), unsafe_allow_html=True)
56
-
57
  # Output of top-3 hits from re-ranker
58
- st.markdown("\n-------------------------\n")
59
- st.subheader(f"Top-{top_k} Cross-Encoder Re-ranker hits")
60
  hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
61
 
62
- rerank_df = display_df_as_table(hits,top_k,'cross-score')
63
- st.write(rerank_df.to_html(index=False), unsafe_allow_html=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  window_size = st.sidebar.slider("Number of Sentences Generated in Search Response",min_value=1,max_value=5,value=3)
14
 
15
+ ## Save to a dataframe for ease of visualization
16
+ sen_df = st.session_state['sen_def']
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  passages = preprocess_plain_text(st.session_state['earnings_passages'],window_size=window_size)
19
 
20
  ##### Sematic Search #####
 
34
  for idx in range(len(cross_scores)):
35
  hits[idx]['cross-score'] = cross_scores[idx]
36
 
 
 
 
 
 
 
 
 
37
  # Output of top-3 hits from re-ranker
 
 
38
  hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
39
 
40
+ score='cross-score'
41
+ df = pd.DataFrame([(hit[score],passages[hit['corpus_id']]) for hit in hits[0:2]],columns=['Score','Text'])
42
+ df['Score'] = round(df['Score'],2)
43
+
44
+ tag_list = []
45
+
46
+ def gen_annotated_text(para):
47
+ for i in sent_tokenize(para):
48
+ label = sen_df.loc[sen_df['text']==i, 'label'].values[0]
49
+ if label == 'Negative':
50
+ tag_list.append((i,label,'#faa'))
51
+ elif label == 'Positive':
52
+ tag_list.append((i,label,'#afa'))
53
+ else:
54
+ tag_list.append((i,label,'#fea'))
55
+ return tag_list
56
+
57
+ text_to_annotate = [gen_annotated_text(para) for para in df.Text.tolist()]
58
+
59
+ for i in text_to_annotate:
60
+ annotated_text(i)
61
+