xai_framework / utils /data_users.py
hodorfi's picture
Upload 1288 files
191195c
raw
history blame
7.1 kB
import streamlit as st
import os
ROOT_FIG_DIR = f'{os.getcwd()}/figures/'
def get_product_dev_page_layout():
# st.title("Data Details")
# st.write(
# """
# ##
# Examining data is the key factor here and it provides deta-centric approach to test any idea.
# """)
# list_test = """<ul>
# <li><strong>Target Group: </strong>Developer, Model Owners, Product Managers</li>
# </ul>"""
# st.markdown(list_test, unsafe_allow_html=True)
# LAYING OUT THE MIDDLE SECTION OF THE APP WITH THE MAPS
# row4_1, row4_2, row4_3 = st.columns((1,1,1))
row4_1, row4_2, row4_3 = st.tabs(["Data Source Information", "Exploratory Data Stats", "Data Onboarding"])
with row4_1:
# st.write("**Data Source Info**")
st.subheader('Data Source Information')
# new_title = '<h4 style="color:Green;">Data set infor:</h4>'
# st.markdown(new_title, unsafe_allow_html=True)
# where the data comes and how it is collected, what the data includes, what are the details of the data, how the data is used.
# answers four different questions
st.write(" Data set consists of OCT images from CNV, DME, DRUSEN and NORMAL cases...(from 4686 adult patients) a")
st.caption('Source')
st.write("[Labeled Optical Coherence Tomography (OCT) and Chest X-Ray Images for Classification](https://data.mendeley.com/datasets/rscbjbr9sj/3)")
with st.expander('Data Collection Details(Click for more info)'):
st.write("""As stated OCT images are clloected from the Shiley Eye Institute of the University of California San Diego,
the California Retinal Research Foundation,
Medical Center Ophthalmology Associates, the Shanghai First People’s Hospital, and Beijing Tongren Eye Center between
July 1, 2013 and March 1, 2017.""")
st.caption('Case Samples')
# https://www.aao.org/eye-health/ask-ophthalmologist-q/choroidal-neovascularization-definition-treatment
st.image('./figures/oct_details.png')
list_test = """<ul>Case explanations:
<li><strong>CNV: </strong>t) choroidal neovascularization (CNV) with neovascular membrane (white arrowheads) and associated subretinal fluid (arrows</li>
<li> <strong style="color:Green;"><em>DRUSEN: </em></strong> Multiple drusen (arrowheads) present in early AMD</li>
<li><strong>DME: </strong>) Diabetic
macular edema (DME) with retinal-thickening-associated intraretinal fluid</li>
<li>Normal</li>
</ul>"""
st.markdown(list_test, unsafe_allow_html=True)
st.caption('License:')
with st.expander('License: CC BY 4.0 license'):
st.write("""
The files associated with this dataset are licensed under a Creative Commons Attribution 4.0 International license. What does this mean?
You can share, copy and modify this dataset so long as you give appropriate credit,
provide a link to the CC BY license, and indicate if changes were made, but you may not do
so in a way that suggests the rights holder has endorsed you or your use of the dataset.
Note that further permission may be required for any content within the dataset
that is identified as belonging to a third party. More details about the licences can be found
[here](https://creativecommons.org/about/cclicenses/).
""")
# st.write("Open to be used for researh.")
with row4_2:
st.subheader('Exploratory Data Stats')
with st.expander('Training Data Info'):
col1, col2 = st.columns((1,1))
st.write("**Raw Data Details**")
with col1:
st.caption('Number of Images per Category')
st.image(f'{ROOT_FIG_DIR}/train_raw_category_dist.png')
with col2:
st.caption('Aspect ratio')
st.image(f'{ROOT_FIG_DIR}/train_raw_aspectratio_dist.png')
st.caption('Image Samples')
st.image(f'{ROOT_FIG_DIR}/train_samples.png')
with st.expander('Test Data Info'):
st.write("**Raw Data Details**")
st.caption('Number of Images per Category')
st.image(f'{ROOT_FIG_DIR}/test_category_dist.png')
st.caption('Aspect ratio')
st.image(f'{ROOT_FIG_DIR}/test_aspectratio_dist.png')
st.caption('Image Samples')
st.image(f'{ROOT_FIG_DIR}/train_samples.png')
with row4_3:
# st.write("**Post Processesd Data Details**")
st.subheader('Pre-Processing Details')
st.write(
"""
##
Since the training set has a problem of class imbalanced, we need to solve this issue. To do so, representative sampling strategy is used with hierarchical clustering.
""")
# st.caption('')
new_title = '<h5 style="color:Black;">Post Processing Steps:</h5>'
st.markdown(new_title, unsafe_allow_html=True)
code = '''def representative_sampling():
for each_class in category_list:
embeddings = get_resnet50_embeddings(each_class)
n_cluster = run_hierarchical_clustering(embeddings)
samples = get_representative_n_samples_within_each_cluster(n_cluster)'''
st.code(code, language='python')
with st.expander('Training Data Info after Representative Sampling'):
# new_title = '<h5 style="color:Black;">Training Set Info after Representative Sampling:</h5>'
# st.markdown(new_title, unsafe_allow_html=True)
col1, col2 = st.columns((1,1))
with col1:
st.caption('Number of Images per Category')
st.image(f'{ROOT_FIG_DIR}/train_filter_category_dist.png')
with col2:
st.caption('Aspect ratio')
st.image(f'{ROOT_FIG_DIR}/train_filter_aspectratio_dist.png')
st.caption('Image Samples')
st.image(f'{ROOT_FIG_DIR}/train_samples.png')
st.write("Model Input Size Resizing: 180x180x3")
# st.caption('Post Processing Steps:')
# code = '''def representative_sampling():
# ... for class_i in category_list:
# ... print("Hello, Streamlit!")'''
# st.code(code, language='python')
# st.code('for class_i in category_list: hiearhical_cluster(class_i)')
def get_developer_page_layout():
st.header("Developer")
markdown = """
1. For the [GitHub repository](https://github.com/giswqs/streamlit-multipage-template) or [use it as a template](https://github.com/giswqs/streamlit-multipage-template/generate) for your own project.
2. Customize the sidebar by changing the sidebar text and logo in each Python files.
3. Find your favorite emoji from https://emojipedia.org.
"""
st.markdown(markdown)