geodata-harvester-app

Sleeping

File size: 12,910 Bytes

# Geodata-Harvester App
# Description: Streamlit app for the Geodata-Harvester package
# Author: Sebastian Haan

import streamlit as st
import sys
import os
import pandas as pd
import numpy as np
import yaml
import shutil
from types import SimpleNamespace
from PIL import Image
# Import the core package Geodata-Harvester
from geodata_harvester import harvest


## Limitations:
# The Google Earth Engine extension for Geodata-Harvester is not natively supported in Streamlit, 
# unless you run the app locally or provide GEE service account token.

FNAME_ZIP = 'harvest_collection.zip'
OUTPATH = 'harvest_collection'

# Link to settings templates
settings_template_link = 'https://huggingface.co/spaces/SIH/geodata-harvester-app/tree/main/data/templates_settings'
# link to data template
data_template_link = 'https://huggingface.co/spaces/SIH/geodata-harvester-app/tree/main/data'
# link to github page
link_to_githubpage = 'https://sydney-informatics-hub.github.io/geodata-harvester/'
# link to settings overview
link_to_settings_overview = 'https://github.com/Sydney-Informatics-Hub/geodata-harvester/blob/main/quarto/docs/Settings_Overview.md'

menu_dict = {
    "About": "https://sydney-informatics-hub.github.io/geodata-harvester/",
    "Get help": "https://github.com/Sydney-Informatics-Hub/geodata-harvester/"}

def init_header():
    # Add a title
    st.title('Geodata-Harvester App')
    st.subheader("Jumpstart your geospatial analysis.")
    image = Image.open('assets/dataharvester_streamlit_logo_blue.jpg')
    st.image(image)
    st.write(f"The Geodata-Harvester automates geodata download and spatio-temporal processing from a large range of datasources into ready-made datasets.\
        This application is a lightweight Streamlit wrapper crafted to work seamlessly with the Geodata-Harvester package.\
        For full accessibility to a comprehensive set of features and options, please visit the [Geodata-Harvester project]({link_to_githubpage}).")
    #st.divider()

def init_howto():
    #with workpanel.container():
    st.subheader('How to')
    st.write(f'1) Prepare a settings file that lists all requested layers and spatio-temporal settings (see [templates]({settings_template_link})). For more info please see section below.')
    st.write('2) Upload the settings file in the sidebar.')
    st.write('3) Optional: Select csv file including Latitude and Longitude points. This will enable the Geodata-Harvester to automatically populate the table with extracted data for all given locations.')
    st.write('4) Then click `Run Geodata-Harvester` on the top of the sidebar.')
    st.write('5) Download and processing of all data might take a couple of minutes. Once completed, you will be able to download all generated results as zip.')

def print_attr():
    st.subheader('Attributions and Acknowledgements')
    st.write('This software was developed by the Sydney Informatics Hub, a core research facility of the University of Sydney.')
    st.write('Acknowledgments are an important way for us to demonstrate the value we bring to your research. Your research outcomes are vital for ongoing funding of the Sydney Informatics Hub.')
    st.write('If you make use of this software for your research project, please include the following acknowledgment:')
    st.write('“This research was supported by the Sydney Informatics Hub, a Core Research Facility of the University of Sydney."')

def load_settings(settings_file):
    if settings_file is not None:
        fname_settings = settings_file#.getvalue()
        settings = yaml.load(settings_file, Loader=yaml.FullLoader)
        # Parse settings dictionary as namespace
        settings = SimpleNamespace(**settings)
        settings.date_min = str(settings.date_min)
        settings.date_max = str(settings.date_max)
        if st.session_state['data_file'] is not None:
            settings.infile = st.session_state['data_file']
        settings.outpath = OUTPATH
        return settings

def print_settings(settings):
    if settings:
        #with workpanel.container():
        st.header("Settings loaded:")
        for key in settings.__dict__:
            if key == "target_sources":
                st.write(f"settings.{key}:")
                for source in settings.target_sources:
                    st.write(f"   '{source}': {settings.target_sources[source]}")
            else:
                st.write(f"settings.{key} : {settings.__dict__[key]}")


def print_info():
    st.subheader('Settings Overview')
    st.write('The Geodata-Harvester is controlled by a settings file in YAML format, \
        which includes all user-defined settings and data layers for extraction and processing.')
    st.write('Example settings file can be found in the [templates]({settings_template_link}).')
    st.write('The settings file includes the following sections:')
    st.markdown("""yaml

    #Bounding Box as (lng_min, lat_min, lng_max, lat_max):
    #If not provided, the Geodata-Harvester will try to infer bbox from points in Locations file
    target_bbox: [149, -30, 149.9, -29]

    #Select start date:
    date_min: : 2023-01-01

    #Select end date:
    date_max: : 2023-02-01

    #Spatial Resolution [in arcsec]:
    target_res: 100.0

    # Number of time interval slices in given date range
    temp_intervals: 1 
    #Headername of Latitude in input file (only relevant if data file provided):
    colname_lat: Lat

    #Headername of Longitude in input file (only relevant if data file provided):
    colname_lng: Long

    target_sources:
    #Satellite data from Digital Earth Australia (optional)
    DEA:
    - landsat_barest_earth

    #National Digital Elevation Model (DEM) 1 Second (optional)
    DEM:
    - DEM
    - Slope
    - Aspect
    
    #Landscape Data (optional)
    Landscape:
    - Relief_300m

    #Radiometric Data (optional)
    Radiometric:
    - radmap2019_grid_dose_terr_awags_rad_2019
    - radmap2019_grid_dose_terr_filtered_awags_rad_2019

    # SILO Climate Data (optional)
    # temporal aggregation options: 'mean', 'median', 'sum', 'std', 'perc95', 'perc5', 'max', 'min'
    SILO:
        max_temp:
        - Median
        min_temp:
        - Median

    #Soil data from SLGA (optional)
    SLGA:
    Bulk_Density:
        - 0-5cm
    Clay:
        - 0-5cm
    """)
    st.subheader('Data Overview')
    # Data Overview list
    st.markdown("""
    The following data sources are currently supported by the Geodata-Harvester Streamlit app:
    - Soil Data 3D SLGA (Australia)
    - SILO Climate Database (Australia)
    - National Digital Elevation Model incl. Slope, Aspect (Australia)
    - Digital Earth Australia Geoscience Earth Observations (incl. Sentinel, Landsat, MODIS for Australia)
    - Radiometric Data (Australia)
    - Landscape Data (Australia)
    For more information, please visit the [Geodata-Harvester Data Overview]({https://github.com/Sydney-Informatics-Hub/geodata-harvester/blob/main/quarto/docs/Data_Overview.md}).
    Note that Google Earth Engine is not supported in streamlit app unless you have a GEE token or run the app locally.
    """)

def load_data(data_file):
    # load data into pandas dataframe from csv file
    if data_file is not None:
        #st.sidebar.write(f"Loading data from {data_file}")
        df = pd.read_csv(data_file)
        return df

def show_results():
    df_results = pd.read_csv(st.session_state['fname_results'])
    st.write('Extracted Data Table:')
    st.write(df_results)

def zip_folder(folder_path):
    filename = FNAME_ZIP.split('.')[0]
    directory = folder_path
    shutil.make_archive(filename, "zip", folder_path)

def gee_init():
    # initialize earth engine (experimental)
    st.sidebar.header('Google Earth Engine (Optional)')
    st.sidebar.write(f'Please copy your Google Earth Engine authentication token in the field below.')
    os.environ['EARTHENGINE_TOKEN'] = st.sidebar.text_input(
            label="Enter Google Earth Engine Token:",
            type="password"
        ).strip()

def open_sidebar():
    # Add run 
    st.sidebar.header('Run Geodata-Harvester')
    st.sidebar.write(f'This will harvest geodata as specified in the settings file below.')
    button_run = st.sidebar.button('Run Geodata-Harvester')

    st.sidebar.divider()

    # Add yaml file settings file upload
    st.sidebar.header('Settings File')
    settings_file = st.sidebar.file_uploader(f"Choose a yaml settings file (see [templates]({settings_template_link}))")
    button_show_settings = st.sidebar.button('Show Settings')
    st.session_state['settings_file'] = settings_file

    st.sidebar.divider()

    # Add data file uploader
    st.sidebar.header('Locations File')
    data_file = st.sidebar.file_uploader(f"Upload a CSV file with Latitudes and Longitudes (see [example data csv]({data_template_link}))")
    button_show_data = st.sidebar.button('Show Input Data')
    st.session_state['data_file'] = data_file

    st.sidebar.divider()

    # optional
    gee_init()
    
    return button_show_settings, button_show_data, button_run

def run_harvester():
    if ('settings_file' in st.session_state) & (st.session_state['settings_file'] is not None):
        fname_settings = st.session_state['settings_file']
        settings = load_settings(fname_settings)
        if ('data_file2' in st.session_state) & (st.session_state['data_file2'] is not None):
            settings.infile = st.session_state['data_file2'] #.name
            #st.write(f"settings.infile: {settings.infile}")
        #outpath = settings.outpath
        os.makedirs(name=OUTPATH, exist_ok=True)
        # write settings to disk
        with open(os.path.join(OUTPATH,'settings.yaml'), 'w') as file:
            yaml.dump(settings.__dict__, file)
        with st.spinner('Running Geodata-Harvester...'):
            df = harvest.run(os.path.join(OUTPATH,'settings.yaml'), return_df = True)
        st.success('Harvest complete!', icon="✅")
        return df 
    else:
        st.error('Please upload settings file.')
        return None



##########   Main  ###########

def main():
    #st.set_page_config(layout="wide")
    st.set_page_config(page_title="Geodata-Harvester App",
        layout="centered",
        menu_items=menu_dict)
    #st.markdown(footer, unsafe_allow_html=True)
    init_header()
    with st.expander("How to use this app"):
        init_howto()
    with st.expander("Settings and Data Overview"):
        print_info()
    with st.expander("Attributions and Acknowledgements"):
        print_attr()
    #st.divider()
    workpanel = st.empty()
    button_show_settings, button_show_data, button_run = open_sidebar()

    if button_show_settings:
        settings = load_settings(st.session_state['settings_file'])
        print_settings(settings)

    if button_show_data:
        data_file = st.session_state['data_file']
        df = load_data(data_file)
        st.header("Data Table:")
        st.write(df)

    if button_run:
        if ('data_file' in st.session_state) & (st.session_state['data_file'] is not None):
            # save data so it can be read by geopandas later
            df = load_data(st.session_state['data_file'])
            df.to_csv('temp_data.csv')
            st.session_state['data_file2'] = 'temp_data.csv'
        else:
            st.session_state['data_file2'] = None
        if ('settings_file' in st.session_state) & (st.session_state['settings_file'] is not None):
            df_results = run_harvester()
            # zip result folder in python and make available for download
            with st.spinner(f'Generating {FNAME_ZIP} file for download...'):
                zip_folder(OUTPATH)
                st.session_state['fname_results_zip'] = FNAME_ZIP
                st.success('Zip file generated!', icon="✅")
            # delete temp data file
            if os.path.exists('temp_data.csv'):
                os.remove('temp_data.csv')
            #shutil.rmtree(outpath)
            # show results
            st.session_state['fname_results'] = os.path.join(OUTPATH,'results.csv')
        else:
            #workpanel.empty()
            #with workpanel.container():
            st.text('No settings file provided. Please add settings file.')

    # Show options and buttons after run
    if 'fname_results_zip' in st.session_state:
        st.write('Harvester Results:')
        with open(FNAME_ZIP, 'rb') as f:
            st.download_button('Download Results (zip)', f, file_name=FNAME_ZIP)
        if os.path.exists(st.session_state['fname_results']):
            button_show_results = st.button('Show Result Table')
            if button_show_results:
                show_results()


if __name__ == "__main__":
    main()


#### Debugging options
#st.write('Streamlit version:', st.__version__)
#st.write('Session state:', st.session_state)
#for key in st.session_state:
#    st.write(f"{key}: {st.session_state[key]}")