Update pages/3_Earnings_Semantic_Search_π_.py
Browse files
pages/3_Earnings_Semantic_Search_π_.py
CHANGED
@@ -12,21 +12,9 @@ top_k = st.sidebar.slider("Number of Top Hits Generated",min_value=1,max_value=5
|
|
12 |
|
13 |
window_size = st.sidebar.slider("Number of Sentences Generated in Search Response",min_value=1,max_value=5,value=3)
|
14 |
|
15 |
-
|
|
|
16 |
|
17 |
-
with st.expander("See Transcribed Earnings Text"):
|
18 |
-
st.write(f"Number of Sentences: {len(earnings_sentences)}")
|
19 |
-
|
20 |
-
st.write(earnings_passages)
|
21 |
-
|
22 |
-
|
23 |
-
## Save to a dataframe for ease of visualization
|
24 |
-
sen_df = pd.DataFrame(earnings_sentiment)
|
25 |
-
sen_df['text'] = earnings_sentences
|
26 |
-
grouped = pd.DataFrame(sen_df['label'].value_counts()).reset_index()
|
27 |
-
grouped.columns = ['sentiment','count']
|
28 |
-
|
29 |
-
|
30 |
passages = preprocess_plain_text(st.session_state['earnings_passages'],window_size=window_size)
|
31 |
|
32 |
##### Sematic Search #####
|
@@ -46,18 +34,28 @@ cross_scores = cross_encoder.predict(cross_inp)
|
|
46 |
for idx in range(len(cross_scores)):
|
47 |
hits[idx]['cross-score'] = cross_scores[idx]
|
48 |
|
49 |
-
# Output of top-3 hits from bi-encoder
|
50 |
-
st.markdown("\n-------------------------\n")
|
51 |
-
st.subheader(f"Top-{top_k} Bi-Encoder Retrieval hits")
|
52 |
-
hits = sorted(hits, key=lambda x: x['score'], reverse=True)
|
53 |
-
|
54 |
-
cross_df = display_df_as_table(hits,top_k)
|
55 |
-
st.write(cross_df.to_html(index=False), unsafe_allow_html=True)
|
56 |
-
|
57 |
# Output of top-3 hits from re-ranker
|
58 |
-
st.markdown("\n-------------------------\n")
|
59 |
-
st.subheader(f"Top-{top_k} Cross-Encoder Re-ranker hits")
|
60 |
hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
|
61 |
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
window_size = st.sidebar.slider("Number of Sentences Generated in Search Response",min_value=1,max_value=5,value=3)
|
14 |
|
15 |
+
## Save to a dataframe for ease of visualization
|
16 |
+
sen_df = st.session_state['sen_def']
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
passages = preprocess_plain_text(st.session_state['earnings_passages'],window_size=window_size)
|
19 |
|
20 |
##### Sematic Search #####
|
|
|
34 |
for idx in range(len(cross_scores)):
|
35 |
hits[idx]['cross-score'] = cross_scores[idx]
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
# Output of top-3 hits from re-ranker
|
|
|
|
|
38 |
hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
|
39 |
|
40 |
+
score='cross-score'
|
41 |
+
df = pd.DataFrame([(hit[score],passages[hit['corpus_id']]) for hit in hits[0:2]],columns=['Score','Text'])
|
42 |
+
df['Score'] = round(df['Score'],2)
|
43 |
+
|
44 |
+
tag_list = []
|
45 |
+
|
46 |
+
def gen_annotated_text(para):
|
47 |
+
for i in sent_tokenize(para):
|
48 |
+
label = sen_df.loc[sen_df['text']==i, 'label'].values[0]
|
49 |
+
if label == 'Negative':
|
50 |
+
tag_list.append((i,label,'#faa'))
|
51 |
+
elif label == 'Positive':
|
52 |
+
tag_list.append((i,label,'#afa'))
|
53 |
+
else:
|
54 |
+
tag_list.append((i,label,'#fea'))
|
55 |
+
return tag_list
|
56 |
+
|
57 |
+
text_to_annotate = [gen_annotated_text(para) for para in df.Text.tolist()]
|
58 |
+
|
59 |
+
for i in text_to_annotate:
|
60 |
+
annotated_text(i)
|
61 |
+
|