mtyrrell commited on
Commit
b125eed
·
1 Parent(s): aa9a3c7

v2.1 added RAG summary by group

Browse files
.DS_Store ADDED
Binary file (8.2 kB). View file
 
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ civ_v2/
app.py CHANGED
@@ -2,36 +2,36 @@ import streamlit as st
2
  import os
3
  import pkg_resources
4
 
5
- # Using this wacky hack to get around the massively ridicolous managed env loading order
6
- def is_installed(package_name, version):
7
- try:
8
- pkg = pkg_resources.get_distribution(package_name)
9
- return pkg.version == version
10
- except pkg_resources.DistributionNotFound:
11
- return False
12
-
13
- # shifted from below - this must be the first streamlit call; otherwise: problems
14
- st.set_page_config(page_title = 'Vulnerability Analysis',
15
- initial_sidebar_state='expanded', layout="wide")
16
-
17
- @st.cache_resource # cache the function so it's not called every time app.py is triggered
18
- def install_packages():
19
- install_commands = []
20
-
21
- if not is_installed("spaces", "0.12.0"):
22
- install_commands.append("pip install spaces==0.17.0")
23
 
24
- if not is_installed("pydantic", "1.8.2"):
25
- install_commands.append("pip install pydantic==1.8.2")
26
 
27
- if not is_installed("typer", "0.4.0"):
28
- install_commands.append("pip install typer==0.4.0")
29
 
30
- if install_commands:
31
- os.system(" && ".join(install_commands))
32
 
33
- # install packages if necessary
34
- install_packages()
35
 
36
  import appStore.vulnerability_analysis as vulnerability_analysis
37
  import appStore.target as target_analysis
@@ -41,8 +41,8 @@ from utils.vulnerability_classifier import label_dict
41
  import pandas as pd
42
  import plotly.express as px
43
 
44
- #st.set_page_config(page_title = 'Vulnerability Analysis',
45
- # initial_sidebar_state='expanded', layout="wide")
46
 
47
  with st.sidebar:
48
  # upload and example doc
@@ -54,7 +54,7 @@ with st.sidebar:
54
  add_upload(choice)
55
 
56
  with st.container():
57
- st.markdown("<h2 style='text-align: center; color: black;'> Vulnerability Analysis 2.0 </h2>", unsafe_allow_html=True)
58
  st.write(' ')
59
 
60
  with st.expander("ℹ️ - About this app", expanded=False):
 
2
  import os
3
  import pkg_resources
4
 
5
+ # # Using this wacky hack to get around the massively ridicolous managed env loading order
6
+ # def is_installed(package_name, version):
7
+ # try:
8
+ # pkg = pkg_resources.get_distribution(package_name)
9
+ # return pkg.version == version
10
+ # except pkg_resources.DistributionNotFound:
11
+ # return False
12
+
13
+ # # shifted from below - this must be the first streamlit call; otherwise: problems
14
+ # st.set_page_config(page_title = 'Vulnerability Analysis',
15
+ # initial_sidebar_state='expanded', layout="wide")
16
+
17
+ # @st.cache_resource # cache the function so it's not called every time app.py is triggered
18
+ # def install_packages():
19
+ # install_commands = []
20
+
21
+ # if not is_installed("spaces", "0.12.0"):
22
+ # install_commands.append("pip install spaces==0.17.0")
23
 
24
+ # if not is_installed("pydantic", "1.8.2"):
25
+ # install_commands.append("pip install pydantic==1.8.2")
26
 
27
+ # if not is_installed("typer", "0.4.0"):
28
+ # install_commands.append("pip install typer==0.4.0")
29
 
30
+ # if install_commands:
31
+ # os.system(" && ".join(install_commands))
32
 
33
+ # # install packages if necessary
34
+ # install_packages()
35
 
36
  import appStore.vulnerability_analysis as vulnerability_analysis
37
  import appStore.target as target_analysis
 
41
  import pandas as pd
42
  import plotly.express as px
43
 
44
+ st.set_page_config(page_title = 'Vulnerability Analysis',
45
+ initial_sidebar_state='expanded', layout="wide")
46
 
47
  with st.sidebar:
48
  # upload and example doc
 
54
  add_upload(choice)
55
 
56
  with st.container():
57
+ st.markdown("<h2 style='text-align: center;'> Vulnerability Analysis 2.0 </h2>", unsafe_allow_html=True)
58
  st.write(' ')
59
 
60
  with st.expander("ℹ️ - About this app", expanded=False):
appStore/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (154 Bytes). View file
 
appStore/__pycache__/doc_processing.cpython-310.pyc ADDED
Binary file (3.18 kB). View file
 
appStore/__pycache__/rag.cpython-310.pyc ADDED
Binary file (1.81 kB). View file
 
appStore/__pycache__/target.cpython-310.pyc ADDED
Binary file (2.8 kB). View file
 
appStore/__pycache__/vulnerability_analysis.cpython-310.pyc ADDED
Binary file (4.78 kB). View file
 
appStore/rag.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ # import json
3
+ import numpy as np
4
+ import pandas as pd
5
+ import openai
6
+ from haystack.schema import Document
7
+ import streamlit as st
8
+ from tenacity import retry, stop_after_attempt, wait_random_exponential
9
+
10
+
11
+ # Get openai API key
12
+ openai.api_key = os.environ["OPENAI_API_KEY"]
13
+ model_select = "gpt-3.5-turbo-1106"
14
+
15
+
16
+ # define a special function for putting the prompt together (as we can't use haystack)
17
+ def get_prompt(context):
18
+ base_prompt="Summarize the following context efficiently in bullet points, the less the better. \
19
+ Summarize only activities that address the vulnerability of the given context to climate change. \
20
+ Formatting example: \
21
+ - Collect and utilize gender-disaggregated data to inform and improve climate change adaptation efforts. \
22
+ - Prioritize gender sensitivity in adaptation options, ensuring participation and benefits for women, who are more vulnerable to climate impacts. \
23
+ "
24
+
25
+ # Add the meta data for references
26
+ # context = ' - '.join([d.content for d in docs])
27
+ prompt = base_prompt+"; Context: "+context+"; Answer:"
28
+
29
+ return prompt
30
+
31
+
32
+ # # convert df rows to Document object so we can feed it into the summarizer easily
33
+ # def get_document(df):
34
+ # # we take a list of each extract
35
+ # ls_dict = []
36
+ # for index, row in df.iterrows():
37
+ # # Create a Document object for each row (we only need the text)
38
+ # doc = Document(
39
+ # row['text'],
40
+ # meta={
41
+ # 'label': row['Vulnerability Label']}
42
+ # )
43
+ # # Append the Document object to the documents list
44
+ # ls_dict.append(doc)
45
+
46
+ # return ls_dict
47
+
48
+
49
+ # exception handling for issuing multiple API calls to openai (exponential backoff)
50
+ @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
51
+ def completion_with_backoff(**kwargs):
52
+ return openai.ChatCompletion.create(**kwargs)
53
+
54
+
55
+ # construct RAG query, send to openai and process response
56
+ def run_query(df):
57
+ docs = df
58
+
59
+ '''
60
+ For non-streamed completion, enable the following 2 lines and comment out the code below
61
+ '''
62
+ # res = openai.ChatCompletion.create(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}])
63
+ # result = res.choices[0].message.content
64
+
65
+ # instantiate ChatCompletion as a generator object (stream is set to True)
66
+ response = completion_with_backoff(model=model_select, messages=[{"role": "user", "content": get_prompt(docs)}], stream=True)
67
+ # iterate through the streamed output
68
+ report = []
69
+ res_box = st.empty()
70
+ for chunk in response:
71
+ # extract the object containing the text (totally different structure when streaming)
72
+ chunk_message = chunk['choices'][0]['delta']
73
+ # test to make sure there is text in the object (some don't have)
74
+ if 'content' in chunk_message:
75
+ report.append(chunk_message.content) # extract the message
76
+ # add the latest text and merge it with all previous
77
+ result = "".join(report).strip()
78
+ # res_box.success(result) # output to response text box
79
+ res_box.success(result)
80
+
81
+
82
+
83
+
84
+
85
+
86
+
appStore/target.py CHANGED
@@ -17,6 +17,7 @@ from io import BytesIO
17
  import xlsxwriter
18
  import plotly.express as px
19
  from utils.target_classifier import label_dict
 
20
 
21
  # Declare all the necessary variables
22
  classifier_identifier = 'target'
@@ -82,7 +83,40 @@ def app():
82
 
83
  def target_display():
84
 
 
 
85
  # Assign dataframe a name
86
  df = st.session_state['key2']
87
-
88
  st.write(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  import xlsxwriter
18
  import plotly.express as px
19
  from utils.target_classifier import label_dict
20
+ from appStore.rag import run_query
21
 
22
  # Declare all the necessary variables
23
  classifier_identifier = 'target'
 
83
 
84
  def target_display():
85
 
86
+ ### TABLE Output ###
87
+
88
  # Assign dataframe a name
89
  df = st.session_state['key2']
 
90
  st.write(df)
91
+
92
+ ### RAG Output by group ##
93
+
94
+ # Expand the DataFrame
95
+ df_expand = df.explode('Vulnerability Label')
96
+ # Group by 'Vulnerability Label' and concatenate 'text'
97
+ df_agg = df_expand.groupby('Vulnerability Label')['text'].agg('; '.join).reset_index()
98
+
99
+ st.markdown("----")
100
+ st.markdown('**DOCUMENT FINDINGS SUMMARY BY VULNERABILITY LABEL:**')
101
+
102
+ # construct RAG query for each label, send to openai and process response
103
+ for i in range(0,len(df_agg)):
104
+ st.write(df_agg['Vulnerability Label'].iloc[i])
105
+ run_query(df_agg['text'].iloc[i])
106
+ # st.write(df_agg['text'].iloc[i])
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+
120
+
121
+
122
+
requirements.txt CHANGED
@@ -19,4 +19,7 @@ altair==4.0
19
  streamlit-aggrid
20
  python-docx
21
  setfit
22
- plotly.express
 
 
 
 
19
  streamlit-aggrid
20
  python-docx
21
  setfit
22
+ plotly.express
23
+ openai==0.27.9
24
+ pydantic==1.8.2
25
+ scikit-learn==1.0.2
utils/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (151 Bytes). View file
 
utils/__pycache__/config.cpython-310.pyc ADDED
Binary file (1.1 kB). View file
 
utils/__pycache__/preprocessing.cpython-310.pyc ADDED
Binary file (9.07 kB). View file
 
utils/__pycache__/target_classifier.cpython-310.pyc ADDED
Binary file (3.6 kB). View file
 
utils/__pycache__/uploadAndExample.cpython-310.pyc ADDED
Binary file (1.22 kB). View file
 
utils/__pycache__/vulnerability_classifier.cpython-310.pyc ADDED
Binary file (4.39 kB). View file