Spaces:
Sleeping
Sleeping
# Geodata-Harvester App | |
# Description: Streamlit app for the Geodata-Harvester package | |
# Author: Sebastian Haan | |
import streamlit as st | |
import sys | |
import os | |
import pandas as pd | |
import numpy as np | |
import yaml | |
import shutil | |
from types import SimpleNamespace | |
from PIL import Image | |
# Import the core package Geodata-Harvester | |
from geodata_harvester import harvest | |
## Limitations: | |
# The Google Earth Engine extension for Geodata-Harvester is not natively supported in Streamlit, | |
# unless you run the app locally or provide GEE service account token. | |
FNAME_ZIP = 'harvest_collection.zip' | |
OUTPATH = 'harvest_collection' | |
# Link to settings templates | |
settings_template_link = 'https://huggingface.co/spaces/SIH/geodata-harvester-app/tree/main/data/templates_settings' | |
# link to data template | |
data_template_link = 'https://huggingface.co/spaces/SIH/geodata-harvester-app/tree/main/data' | |
# link to github page | |
link_to_githubpage = 'https://sydney-informatics-hub.github.io/geodata-harvester/' | |
# link to settings overview | |
link_to_settings_overview = 'https://github.com/Sydney-Informatics-Hub/geodata-harvester/blob/main/quarto/docs/Settings_Overview.md' | |
menu_dict = { | |
"About": "https://sydney-informatics-hub.github.io/geodata-harvester/", | |
"Get help": "https://github.com/Sydney-Informatics-Hub/geodata-harvester/"} | |
def init_header(): | |
# Add a title | |
st.title('Geodata-Harvester App') | |
st.subheader("Jumpstart your geospatial analysis.") | |
image = Image.open('assets/dataharvester_streamlit_logo_blue.jpg') | |
st.image(image) | |
st.write(f"The Geodata-Harvester automates geodata download and spatio-temporal processing from a large range of datasources into ready-made datasets.\ | |
This application is a lightweight Streamlit wrapper crafted to work seamlessly with the Geodata-Harvester package.\ | |
For full accessibility to a comprehensive set of features and options, please visit the [Geodata-Harvester project]({link_to_githubpage}).") | |
#st.divider() | |
def init_howto(): | |
#with workpanel.container(): | |
st.subheader('How to') | |
st.write(f'1) Prepare a settings file that lists all requested layers and spatio-temporal settings (see [templates]({settings_template_link})). For more info please see section below.') | |
st.write('2) Upload the settings file in the sidebar.') | |
st.write('3) Optional: Select csv file including Latitude and Longitude points. This will enable the Geodata-Harvester to automatically populate the table with extracted data for all given locations.') | |
st.write('4) Then click `Run Geodata-Harvester` on the top of the sidebar.') | |
st.write('5) Download and processing of all data might take a couple of minutes. Once completed, you will be able to download all generated results as zip.') | |
def print_attr(): | |
st.subheader('Attributions and Acknowledgements') | |
st.write('This software was developed by the Sydney Informatics Hub, a core research facility of the University of Sydney.') | |
st.write('Acknowledgments are an important way for us to demonstrate the value we bring to your research. Your research outcomes are vital for ongoing funding of the Sydney Informatics Hub.') | |
st.write('If you make use of this software for your research project, please include the following acknowledgment:') | |
st.write('“This research was supported by the Sydney Informatics Hub, a Core Research Facility of the University of Sydney."') | |
def load_settings(settings_file): | |
if settings_file is not None: | |
fname_settings = settings_file#.getvalue() | |
settings = yaml.load(settings_file, Loader=yaml.FullLoader) | |
# Parse settings dictionary as namespace | |
settings = SimpleNamespace(**settings) | |
settings.date_min = str(settings.date_min) | |
settings.date_max = str(settings.date_max) | |
if st.session_state['data_file'] is not None: | |
settings.infile = st.session_state['data_file'] | |
settings.outpath = OUTPATH | |
return settings | |
def print_settings(settings): | |
if settings: | |
#with workpanel.container(): | |
st.header("Settings loaded:") | |
for key in settings.__dict__: | |
if key == "target_sources": | |
st.write(f"settings.{key}:") | |
for source in settings.target_sources: | |
st.write(f" '{source}': {settings.target_sources[source]}") | |
else: | |
st.write(f"settings.{key} : {settings.__dict__[key]}") | |
def print_info(): | |
st.subheader('Settings Overview') | |
st.write('The Geodata-Harvester is controlled by a settings file in YAML format, \ | |
which includes all user-defined settings and data layers for extraction and processing.') | |
st.write('Example settings file can be found in the [templates]({settings_template_link}).') | |
st.write('The settings file includes the following sections:') | |
st.markdown("""yaml | |
#Bounding Box as (lng_min, lat_min, lng_max, lat_max): | |
#If not provided, the Geodata-Harvester will try to infer bbox from points in Locations file | |
target_bbox: [149, -30, 149.9, -29] | |
#Select start date: | |
date_min: : 2023-01-01 | |
#Select end date: | |
date_max: : 2023-02-01 | |
#Spatial Resolution [in arcsec]: | |
target_res: 100.0 | |
# Number of time interval slices in given date range | |
temp_intervals: 1 | |
#Headername of Latitude in input file (only relevant if data file provided): | |
colname_lat: Lat | |
#Headername of Longitude in input file (only relevant if data file provided): | |
colname_lng: Long | |
target_sources: | |
#Satellite data from Digital Earth Australia (optional) | |
DEA: | |
- landsat_barest_earth | |
#National Digital Elevation Model (DEM) 1 Second (optional) | |
DEM: | |
- DEM | |
- Slope | |
- Aspect | |
#Landscape Data (optional) | |
Landscape: | |
- Relief_300m | |
#Radiometric Data (optional) | |
Radiometric: | |
- radmap2019_grid_dose_terr_awags_rad_2019 | |
- radmap2019_grid_dose_terr_filtered_awags_rad_2019 | |
# SILO Climate Data (optional) | |
# temporal aggregation options: 'mean', 'median', 'sum', 'std', 'perc95', 'perc5', 'max', 'min' | |
SILO: | |
max_temp: | |
- Median | |
min_temp: | |
- Median | |
#Soil data from SLGA (optional) | |
SLGA: | |
Bulk_Density: | |
- 0-5cm | |
Clay: | |
- 0-5cm | |
""") | |
st.subheader('Data Overview') | |
# Data Overview list | |
st.markdown(""" | |
The following data sources are currently supported by the Geodata-Harvester Streamlit app: | |
- Soil Data 3D SLGA (Australia) | |
- SILO Climate Database (Australia) | |
- National Digital Elevation Model incl. Slope, Aspect (Australia) | |
- Digital Earth Australia Geoscience Earth Observations (incl. Sentinel, Landsat, MODIS for Australia) | |
- Radiometric Data (Australia) | |
- Landscape Data (Australia) | |
For more information, please visit the [Geodata-Harvester Data Overview]({https://github.com/Sydney-Informatics-Hub/geodata-harvester/blob/main/quarto/docs/Data_Overview.md}). | |
Note that Google Earth Engine is not supported in streamlit app unless you have a GEE token or run the app locally. | |
""") | |
def load_data(data_file): | |
# load data into pandas dataframe from csv file | |
if data_file is not None: | |
#st.sidebar.write(f"Loading data from {data_file}") | |
df = pd.read_csv(data_file) | |
return df | |
def show_results(): | |
df_results = pd.read_csv(st.session_state['fname_results']) | |
st.write('Extracted Data Table:') | |
st.write(df_results) | |
def zip_folder(folder_path): | |
filename = FNAME_ZIP.split('.')[0] | |
directory = folder_path | |
shutil.make_archive(filename, "zip", folder_path) | |
def gee_init(): | |
# initialize earth engine (experimental) | |
st.sidebar.header('Google Earth Engine (Optional)') | |
st.sidebar.write(f'Please copy your Google Earth Engine authentication token in the field below.') | |
os.environ['EARTHENGINE_TOKEN'] = st.sidebar.text_input( | |
label="Enter Google Earth Engine Token:", | |
type="password" | |
).strip() | |
def open_sidebar(): | |
# Add run | |
st.sidebar.header('Run Geodata-Harvester') | |
st.sidebar.write(f'This will harvest geodata as specified in the settings file below.') | |
button_run = st.sidebar.button('Run Geodata-Harvester') | |
st.sidebar.divider() | |
# Add yaml file settings file upload | |
st.sidebar.header('Settings File') | |
settings_file = st.sidebar.file_uploader(f"Choose a yaml settings file (see [templates]({settings_template_link}))") | |
button_show_settings = st.sidebar.button('Show Settings') | |
st.session_state['settings_file'] = settings_file | |
st.sidebar.divider() | |
# Add data file uploader | |
st.sidebar.header('Locations File') | |
data_file = st.sidebar.file_uploader(f"Upload a CSV file with Latitudes and Longitudes (see [example data csv]({data_template_link}))") | |
button_show_data = st.sidebar.button('Show Input Data') | |
st.session_state['data_file'] = data_file | |
st.sidebar.divider() | |
# optional | |
gee_init() | |
return button_show_settings, button_show_data, button_run | |
def run_harvester(): | |
if ('settings_file' in st.session_state) & (st.session_state['settings_file'] is not None): | |
fname_settings = st.session_state['settings_file'] | |
settings = load_settings(fname_settings) | |
if ('data_file2' in st.session_state) & (st.session_state['data_file2'] is not None): | |
settings.infile = st.session_state['data_file2'] #.name | |
#st.write(f"settings.infile: {settings.infile}") | |
#outpath = settings.outpath | |
os.makedirs(name=OUTPATH, exist_ok=True) | |
# write settings to disk | |
with open(os.path.join(OUTPATH,'settings.yaml'), 'w') as file: | |
yaml.dump(settings.__dict__, file) | |
with st.spinner('Running Geodata-Harvester...'): | |
df = harvest.run(os.path.join(OUTPATH,'settings.yaml'), return_df = True) | |
st.success('Harvest complete!', icon="✅") | |
return df | |
else: | |
st.error('Please upload settings file.') | |
return None | |
########## Main ########### | |
def main(): | |
#st.set_page_config(layout="wide") | |
st.set_page_config(page_title="Geodata-Harvester App", | |
layout="centered", | |
menu_items=menu_dict) | |
#st.markdown(footer, unsafe_allow_html=True) | |
init_header() | |
with st.expander("How to use this app"): | |
init_howto() | |
with st.expander("Settings and Data Overview"): | |
print_info() | |
with st.expander("Attributions and Acknowledgements"): | |
print_attr() | |
#st.divider() | |
workpanel = st.empty() | |
button_show_settings, button_show_data, button_run = open_sidebar() | |
if button_show_settings: | |
settings = load_settings(st.session_state['settings_file']) | |
print_settings(settings) | |
if button_show_data: | |
data_file = st.session_state['data_file'] | |
df = load_data(data_file) | |
st.header("Data Table:") | |
st.write(df) | |
if button_run: | |
if ('data_file' in st.session_state) & (st.session_state['data_file'] is not None): | |
# save data so it can be read by geopandas later | |
df = load_data(st.session_state['data_file']) | |
df.to_csv('temp_data.csv') | |
st.session_state['data_file2'] = 'temp_data.csv' | |
else: | |
st.session_state['data_file2'] = None | |
if ('settings_file' in st.session_state) & (st.session_state['settings_file'] is not None): | |
df_results = run_harvester() | |
# zip result folder in python and make available for download | |
with st.spinner(f'Generating {FNAME_ZIP} file for download...'): | |
zip_folder(OUTPATH) | |
st.session_state['fname_results_zip'] = FNAME_ZIP | |
st.success('Zip file generated!', icon="✅") | |
# delete temp data file | |
if os.path.exists('temp_data.csv'): | |
os.remove('temp_data.csv') | |
#shutil.rmtree(outpath) | |
# show results | |
st.session_state['fname_results'] = os.path.join(OUTPATH,'results.csv') | |
else: | |
#workpanel.empty() | |
#with workpanel.container(): | |
st.text('No settings file provided. Please add settings file.') | |
# Show options and buttons after run | |
if 'fname_results_zip' in st.session_state: | |
st.write('Harvester Results:') | |
with open(FNAME_ZIP, 'rb') as f: | |
st.download_button('Download Results (zip)', f, file_name=FNAME_ZIP) | |
if os.path.exists(st.session_state['fname_results']): | |
button_show_results = st.button('Show Result Table') | |
if button_show_results: | |
show_results() | |
if __name__ == "__main__": | |
main() | |
#### Debugging options | |
#st.write('Streamlit version:', st.__version__) | |
#st.write('Session state:', st.session_state) | |
#for key in st.session_state: | |
# st.write(f"{key}: {st.session_state[key]}") |