nickmuchi commited on
Commit
64af83f
Β·
1 Parent(s): 770e1bd

Update pages/3_Earnings_Semantic_Search_πŸ”Ž_.py

Browse files
pages/3_Earnings_Semantic_Search_πŸ”Ž_.py CHANGED
@@ -1,29 +1,62 @@
1
  import streamlit as st
2
  from functions import *
 
 
 
3
 
4
  st.set_page_config(page_title="Earnings Question/Answering", page_icon="πŸ”Ž")
 
5
  st.sidebar.header("Semantic Search")
6
- st.markdown("## Earnings Semantic Search with LangChain, OpenAI & SBert")
7
 
8
- def gen_sentiment(text):
9
- '''Generate sentiment of given text'''
10
- return sent_pipe(text)[0]['label']
11
 
12
- def gen_annotated_text(df):
13
- '''Generate annotated text'''
 
14
 
15
- tag_list=[]
16
- for row in df.itertuples():
17
- label = row[2]
18
- text = row[1]
19
- if label == 'Positive':
20
- tag_list.append((text,label,'#8fce00'))
21
- elif label == 'Negative':
22
- tag_list.append((text,label,'#f44336'))
23
- else:
24
- tag_list.append((text,label,'#000000'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- return tag_list
 
 
 
 
 
 
 
 
 
 
27
 
28
  bi_enc_dict = {'mpnet-base-v2':"all-mpnet-base-v2",
29
  'instructor-base': 'hkunlp/instructor-base'}
@@ -33,8 +66,10 @@ search_input = st.text_input(
33
 
34
  sbert_model_name = st.sidebar.selectbox("Embedding Model", options=list(bi_enc_dict.keys()), key='sbox')
35
 
36
- chunk_size = st.sidebar.slider("Number of Chars per Chunk of Text",min_value=500,max_value=2000,value=1000)
37
- overlap_size = st.sidebar.slider("Number of Overlap Chars in Search Response",min_value=50,max_value=300,value=50)
 
 
38
 
39
  try:
40
 
@@ -47,6 +82,24 @@ try:
47
 
48
  title = st.session_state['title']
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  embedding_model = bi_enc_dict[sbert_model_name]
51
 
52
  with st.spinner(
 
1
  import streamlit as st
2
  from functions import *
3
+ from langchain.chains import QAGenerationChain
4
+ import itertools
5
+
6
 
7
  st.set_page_config(page_title="Earnings Question/Answering", page_icon="πŸ”Ž")
8
+
9
  st.sidebar.header("Semantic Search")
 
10
 
11
+ st.markdown("## Earnings Semantic Search with LangChain, OpenAI & SBert")
 
 
12
 
13
+ st.markdown(
14
+ """
15
+ <style>
16
 
17
+ #MainMenu {visibility: hidden;
18
+ # }
19
+ footer {visibility: hidden;
20
+ }
21
+ .css-card {
22
+ border-radius: 0px;
23
+ padding: 30px 10px 10px 10px;
24
+ background-color: black;
25
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
26
+ margin-bottom: 10px;
27
+ font-family: "IBM Plex Sans", sans-serif;
28
+ }
29
+
30
+ .card-tag {
31
+ border-radius: 0px;
32
+ padding: 1px 5px 1px 5px;
33
+ margin-bottom: 10px;
34
+ position: absolute;
35
+ left: 0px;
36
+ top: 0px;
37
+ font-size: 0.6rem;
38
+ font-family: "IBM Plex Sans", sans-serif;
39
+ color: white;
40
+ background-color: green;
41
+ }
42
+
43
+ .css-zt5igj {left:0;
44
+ }
45
+
46
+ span.css-10trblm {margin-left:0;
47
+ }
48
 
49
+ div.css-1kyxreq {margin-top: -40px;
50
+ }
51
+
52
+
53
+
54
+
55
+
56
+ </style>
57
+ """,
58
+ unsafe_allow_html=True,
59
+ )
60
 
61
  bi_enc_dict = {'mpnet-base-v2':"all-mpnet-base-v2",
62
  'instructor-base': 'hkunlp/instructor-base'}
 
66
 
67
  sbert_model_name = st.sidebar.selectbox("Embedding Model", options=list(bi_enc_dict.keys()), key='sbox')
68
 
69
+ chunk_size = 1000
70
+ overlap_size = 50
71
+
72
+
73
 
74
  try:
75
 
 
82
 
83
  title = st.session_state['title']
84
 
85
+ earnings_text = ','.join(st.session_state['earnings_passages'])
86
+
87
+ st.session_state.eval_set = generate_eval(
88
+ earnings_text, 10, 3000)
89
+
90
+ # Display the question-answer pairs in the sidebar with smaller text
91
+ for i, qa_pair in enumerate(st.session_state.eval_set):
92
+ st.sidebar.markdown(
93
+ f"""
94
+ <div class="css-card">
95
+ <span class="card-tag">Question {i + 1}</span>
96
+ <p style="font-size: 12px;">{qa_pair['question']}</p>
97
+ <p style="font-size: 12px;">{qa_pair['answer']}</p>
98
+ </div>
99
+ """,
100
+ unsafe_allow_html=True,
101
+ )
102
+
103
  embedding_model = bi_enc_dict[sbert_model_name]
104
 
105
  with st.spinner(