mtyrrell's picture
v2.1 added RAG summary by group
b125eed
raw
history blame
6.83 kB
import streamlit as st
import os
import pkg_resources
# # Using this wacky hack to get around the massively ridicolous managed env loading order
# def is_installed(package_name, version):
# try:
# pkg = pkg_resources.get_distribution(package_name)
# return pkg.version == version
# except pkg_resources.DistributionNotFound:
# return False
# # shifted from below - this must be the first streamlit call; otherwise: problems
# st.set_page_config(page_title = 'Vulnerability Analysis',
# initial_sidebar_state='expanded', layout="wide")
# @st.cache_resource # cache the function so it's not called every time app.py is triggered
# def install_packages():
# install_commands = []
# if not is_installed("spaces", "0.12.0"):
# install_commands.append("pip install spaces==0.17.0")
# if not is_installed("pydantic", "1.8.2"):
# install_commands.append("pip install pydantic==1.8.2")
# if not is_installed("typer", "0.4.0"):
# install_commands.append("pip install typer==0.4.0")
# if install_commands:
# os.system(" && ".join(install_commands))
# # install packages if necessary
# install_packages()
import appStore.vulnerability_analysis as vulnerability_analysis
import appStore.target as target_analysis
import appStore.doc_processing as processing
from utils.uploadAndExample import add_upload
from utils.vulnerability_classifier import label_dict
import pandas as pd
import plotly.express as px
st.set_page_config(page_title = 'Vulnerability Analysis',
initial_sidebar_state='expanded', layout="wide")
with st.sidebar:
# upload and example doc
choice = st.sidebar.radio(label = 'Select the Document',
help = 'You can upload the document \
or else you can try a example document',
options = ('Upload Document', 'Try Example'),
horizontal = True)
add_upload(choice)
with st.container():
st.markdown("<h2 style='text-align: center;'> Vulnerability Analysis 2.0 </h2>", unsafe_allow_html=True)
st.write(' ')
with st.expander("ℹ️ - About this app", expanded=False):
st.write(
"""
The Vulnerability Analysis App is an open-source\
digital tool which aims to assist policy analysts and \
other users in extracting and filtering references \
to different groups in vulnerable situations from public documents. \
We use Natural Language Processing (NLP), specifically deep \
learning-based text representations to search context-sensitively \
for mentions of the special needs of groups in vulnerable situations
to cluster them thematically.
""")
st.write("""
What Happens in background?
- Step 1: Once the document is provided to app, it undergoes *Pre-processing*.\
In this step the document is broken into smaller paragraphs \
(based on word/sentence count).
- Step 2: The paragraphs are then fed to the **Vulnerability Classifier** which detects if
the paragraph contains any or multiple references to vulnerable groups.
""")
st.write("")
# Define the apps used
apps = [processing.app, vulnerability_analysis.app, target_analysis.app]
multiplier_val =1/len(apps)
if st.button("Analyze Document"):
prg = st.progress(0.0)
for i,func in enumerate(apps):
func()
prg.progress((i+1)*multiplier_val)
# If there is data stored
if 'key0' in st.session_state:
vulnerability_analysis.vulnerability_display()
target_analysis.target_display()
# ###################################################################
# #with st.sidebar:
# # topic = st.radio(
# # "Which category you want to explore?",
# # (['Vulnerability', 'Concrete targets/actions/measures']))
# #if topic == 'Vulnerability':
# # Assign dataframe a name
# df_vul = st.session_state['key0']
# st.write(df_vul)
# col1, col2 = st.columns([1,1])
# with col1:
# # Header
# st.subheader("Explore references to vulnerable groups:")
# # Text
# num_paragraphs = len(df_vul['Vulnerability Label'])
# num_references = df_vul['Vulnerability Label'].apply(lambda x: 'Other' not in x).sum()
# st.markdown(f"""<div style="text-align: justify;"> The document contains a
# total of <span style="color: red;">{num_paragraphs}</span> paragraphs.
# We identified <span style="color: red;">{num_references}</span>
# references to vulnerable groups.</div>
# <br>
# In the pie chart on the right you can see the distribution of the different
# groups defined. For a more detailed view in the text, see the paragraphs and
# their respective labels in the table below.</div>""", unsafe_allow_html=True)
# with col2:
# ### Bar chart
# # # Create a df that stores all the labels
# df_labels = pd.DataFrame(list(label_dict.items()), columns=['Label ID', 'Label'])
# # Count how often each label appears in the "Vulnerability Labels" column
# group_counts = {}
# # Iterate through each sublist
# for index, row in df_vul.iterrows():
# # Iterate through each group in the sublist
# for sublist in row['Vulnerability Label']:
# # Update the count in the dictionary
# group_counts[sublist] = group_counts.get(sublist, 0) + 1
# # Create a new dataframe from group_counts
# df_label_count = pd.DataFrame(list(group_counts.items()), columns=['Label', 'Count'])
# # Merge the label counts with the df_label DataFrame
# df_label_count = df_labels.merge(df_label_count, on='Label', how='left')
# st.write("df_label_count")
# # # Configure graph
# # fig = px.pie(df_labels,
# # names="Label",
# # values="Count",
# # title='Label Counts',
# # hover_name="Count",
# # color_discrete_sequence=px.colors.qualitative.Plotly
# # )
# # #Show plot
# # st.plotly_chart(fig, use_container_width=True)
# # ### Table
# st.table(df_vul[df_vul['Vulnerability Label'] != 'Other'])
# vulnerability_analysis.vulnerability_display()
# elif topic == 'Action':
# policyaction.action_display()
# else:
# policyaction.policy_display()
#st.write(st.session_state.key0)