geodata-harvester-app

Sleeping

App Files Files Community

geodata-harvester-app / app.py

sebsigma

Update template links to SIH

eff7924 over 1 year ago

raw

history blame contribute delete

12.9 kB

	# Geodata-Harvester App
	# Description: Streamlit app for the Geodata-Harvester package
	# Author: Sebastian Haan

	import streamlit as st
	import sys
	import os
	import pandas as pd
	import numpy as np
	import yaml
	import shutil
	from types import SimpleNamespace
	from PIL import Image
	# Import the core package Geodata-Harvester
	from geodata_harvester import harvest


	## Limitations:
	# The Google Earth Engine extension for Geodata-Harvester is not natively supported in Streamlit,
	# unless you run the app locally or provide GEE service account token.

	FNAME_ZIP = 'harvest_collection.zip'
	OUTPATH = 'harvest_collection'

	# Link to settings templates
	settings_template_link = 'https://huggingface.co/spaces/SIH/geodata-harvester-app/tree/main/data/templates_settings'
	# link to data template
	data_template_link = 'https://huggingface.co/spaces/SIH/geodata-harvester-app/tree/main/data'
	# link to github page
	link_to_githubpage = 'https://sydney-informatics-hub.github.io/geodata-harvester/'
	# link to settings overview
	link_to_settings_overview = 'https://github.com/Sydney-Informatics-Hub/geodata-harvester/blob/main/quarto/docs/Settings_Overview.md'

	menu_dict = {
	"About": "https://sydney-informatics-hub.github.io/geodata-harvester/",
	"Get help": "https://github.com/Sydney-Informatics-Hub/geodata-harvester/"}

	def init_header():
	# Add a title
	st.title('Geodata-Harvester App')
	st.subheader("Jumpstart your geospatial analysis.")
	image = Image.open('assets/dataharvester_streamlit_logo_blue.jpg')
	st.image(image)
	st.write(f"The Geodata-Harvester automates geodata download and spatio-temporal processing from a large range of datasources into ready-made datasets.\
	This application is a lightweight Streamlit wrapper crafted to work seamlessly with the Geodata-Harvester package.\
	For full accessibility to a comprehensive set of features and options, please visit the [Geodata-Harvester project]({link_to_githubpage}).")
	#st.divider()

	def init_howto():
	#with workpanel.container():
	st.subheader('How to')
	st.write(f'1) Prepare a settings file that lists all requested layers and spatio-temporal settings (see [templates]({settings_template_link})). For more info please see section below.')
	st.write('2) Upload the settings file in the sidebar.')
	st.write('3) Optional: Select csv file including Latitude and Longitude points. This will enable the Geodata-Harvester to automatically populate the table with extracted data for all given locations.')
	st.write('4) Then click `Run Geodata-Harvester` on the top of the sidebar.')
	st.write('5) Download and processing of all data might take a couple of minutes. Once completed, you will be able to download all generated results as zip.')

	def print_attr():
	st.subheader('Attributions and Acknowledgements')
	st.write('This software was developed by the Sydney Informatics Hub, a core research facility of the University of Sydney.')
	st.write('Acknowledgments are an important way for us to demonstrate the value we bring to your research. Your research outcomes are vital for ongoing funding of the Sydney Informatics Hub.')
	st.write('If you make use of this software for your research project, please include the following acknowledgment:')
	st.write('“This research was supported by the Sydney Informatics Hub, a Core Research Facility of the University of Sydney."')

	def load_settings(settings_file):
	if settings_file is not None:
	fname_settings = settings_file#.getvalue()
	settings = yaml.load(settings_file, Loader=yaml.FullLoader)
	# Parse settings dictionary as namespace
	settings = SimpleNamespace(**settings)
	settings.date_min = str(settings.date_min)
	settings.date_max = str(settings.date_max)
	if st.session_state['data_file'] is not None:
	settings.infile = st.session_state['data_file']
	settings.outpath = OUTPATH
	return settings

	def print_settings(settings):
	if settings:
	#with workpanel.container():
	st.header("Settings loaded:")
	for key in settings.__dict__:
	if key == "target_sources":
	st.write(f"settings.{key}:")
	for source in settings.target_sources:
	st.write(f" '{source}': {settings.target_sources[source]}")
	else:
	st.write(f"settings.{key} : {settings.__dict__[key]}")


	def print_info():
	st.subheader('Settings Overview')
	st.write('The Geodata-Harvester is controlled by a settings file in YAML format, \
	which includes all user-defined settings and data layers for extraction and processing.')
	st.write('Example settings file can be found in the [templates]({settings_template_link}).')
	st.write('The settings file includes the following sections:')
	st.markdown("""yaml

	#Bounding Box as (lng_min, lat_min, lng_max, lat_max):
	#If not provided, the Geodata-Harvester will try to infer bbox from points in Locations file
	target_bbox: [149, -30, 149.9, -29]

	#Select start date:
	date_min: : 2023-01-01

	#Select end date:
	date_max: : 2023-02-01

	#Spatial Resolution [in arcsec]:
	target_res: 100.0

	# Number of time interval slices in given date range
	temp_intervals: 1
	#Headername of Latitude in input file (only relevant if data file provided):
	colname_lat: Lat

	#Headername of Longitude in input file (only relevant if data file provided):
	colname_lng: Long

	target_sources:
	#Satellite data from Digital Earth Australia (optional)
	DEA:
	- landsat_barest_earth

	#National Digital Elevation Model (DEM) 1 Second (optional)
	DEM:
	- DEM
	- Slope
	- Aspect

	#Landscape Data (optional)
	Landscape:
	- Relief_300m

	#Radiometric Data (optional)
	Radiometric:
	- radmap2019_grid_dose_terr_awags_rad_2019
	- radmap2019_grid_dose_terr_filtered_awags_rad_2019

	# SILO Climate Data (optional)
	# temporal aggregation options: 'mean', 'median', 'sum', 'std', 'perc95', 'perc5', 'max', 'min'
	SILO:
	max_temp:
	- Median
	min_temp:
	- Median

	#Soil data from SLGA (optional)
	SLGA:
	Bulk_Density:
	- 0-5cm
	Clay:
	- 0-5cm
	""")
	st.subheader('Data Overview')
	# Data Overview list
	st.markdown("""
	The following data sources are currently supported by the Geodata-Harvester Streamlit app:
	- Soil Data 3D SLGA (Australia)
	- SILO Climate Database (Australia)
	- National Digital Elevation Model incl. Slope, Aspect (Australia)
	- Digital Earth Australia Geoscience Earth Observations (incl. Sentinel, Landsat, MODIS for Australia)
	- Radiometric Data (Australia)
	- Landscape Data (Australia)
	For more information, please visit the [Geodata-Harvester Data Overview]({https://github.com/Sydney-Informatics-Hub/geodata-harvester/blob/main/quarto/docs/Data_Overview.md}).
	Note that Google Earth Engine is not supported in streamlit app unless you have a GEE token or run the app locally.
	""")

	def load_data(data_file):
	# load data into pandas dataframe from csv file
	if data_file is not None:
	#st.sidebar.write(f"Loading data from {data_file}")
	df = pd.read_csv(data_file)
	return df

	def show_results():
	df_results = pd.read_csv(st.session_state['fname_results'])
	st.write('Extracted Data Table:')
	st.write(df_results)

	def zip_folder(folder_path):
	filename = FNAME_ZIP.split('.')[0]
	directory = folder_path
	shutil.make_archive(filename, "zip", folder_path)

	def gee_init():
	# initialize earth engine (experimental)
	st.sidebar.header('Google Earth Engine (Optional)')
	st.sidebar.write(f'Please copy your Google Earth Engine authentication token in the field below.')
	os.environ['EARTHENGINE_TOKEN'] = st.sidebar.text_input(
	label="Enter Google Earth Engine Token:",
	type="password"
	).strip()

	def open_sidebar():
	# Add run
	st.sidebar.header('Run Geodata-Harvester')
	st.sidebar.write(f'This will harvest geodata as specified in the settings file below.')
	button_run = st.sidebar.button('Run Geodata-Harvester')

	st.sidebar.divider()

	# Add yaml file settings file upload
	st.sidebar.header('Settings File')
	settings_file = st.sidebar.file_uploader(f"Choose a yaml settings file (see [templates]({settings_template_link}))")
	button_show_settings = st.sidebar.button('Show Settings')
	st.session_state['settings_file'] = settings_file

	st.sidebar.divider()

	# Add data file uploader
	st.sidebar.header('Locations File')
	data_file = st.sidebar.file_uploader(f"Upload a CSV file with Latitudes and Longitudes (see [example data csv]({data_template_link}))")
	button_show_data = st.sidebar.button('Show Input Data')
	st.session_state['data_file'] = data_file

	st.sidebar.divider()

	# optional
	gee_init()

	return button_show_settings, button_show_data, button_run

	def run_harvester():
	if ('settings_file' in st.session_state) & (st.session_state['settings_file'] is not None):
	fname_settings = st.session_state['settings_file']
	settings = load_settings(fname_settings)
	if ('data_file2' in st.session_state) & (st.session_state['data_file2'] is not None):
	settings.infile = st.session_state['data_file2'] #.name
	#st.write(f"settings.infile: {settings.infile}")
	#outpath = settings.outpath
	os.makedirs(name=OUTPATH, exist_ok=True)
	# write settings to disk
	with open(os.path.join(OUTPATH,'settings.yaml'), 'w') as file:
	yaml.dump(settings.__dict__, file)
	with st.spinner('Running Geodata-Harvester...'):
	df = harvest.run(os.path.join(OUTPATH,'settings.yaml'), return_df = True)
	st.success('Harvest complete!', icon="✅")
	return df
	else:
	st.error('Please upload settings file.')
	return None



	########## Main ###########

	def main():
	#st.set_page_config(layout="wide")
	st.set_page_config(page_title="Geodata-Harvester App",
	layout="centered",
	menu_items=menu_dict)
	#st.markdown(footer, unsafe_allow_html=True)
	init_header()
	with st.expander("How to use this app"):
	init_howto()
	with st.expander("Settings and Data Overview"):
	print_info()
	with st.expander("Attributions and Acknowledgements"):
	print_attr()
	#st.divider()
	workpanel = st.empty()
	button_show_settings, button_show_data, button_run = open_sidebar()

	if button_show_settings:
	settings = load_settings(st.session_state['settings_file'])
	print_settings(settings)

	if button_show_data:
	data_file = st.session_state['data_file']
	df = load_data(data_file)
	st.header("Data Table:")
	st.write(df)

	if button_run:
	if ('data_file' in st.session_state) & (st.session_state['data_file'] is not None):
	# save data so it can be read by geopandas later
	df = load_data(st.session_state['data_file'])
	df.to_csv('temp_data.csv')
	st.session_state['data_file2'] = 'temp_data.csv'
	else:
	st.session_state['data_file2'] = None
	if ('settings_file' in st.session_state) & (st.session_state['settings_file'] is not None):
	df_results = run_harvester()
	# zip result folder in python and make available for download
	with st.spinner(f'Generating {FNAME_ZIP} file for download...'):
	zip_folder(OUTPATH)
	st.session_state['fname_results_zip'] = FNAME_ZIP
	st.success('Zip file generated!', icon="✅")
	# delete temp data file
	if os.path.exists('temp_data.csv'):
	os.remove('temp_data.csv')
	#shutil.rmtree(outpath)
	# show results
	st.session_state['fname_results'] = os.path.join(OUTPATH,'results.csv')
	else:
	#workpanel.empty()
	#with workpanel.container():
	st.text('No settings file provided. Please add settings file.')

	# Show options and buttons after run
	if 'fname_results_zip' in st.session_state:
	st.write('Harvester Results:')
	with open(FNAME_ZIP, 'rb') as f:
	st.download_button('Download Results (zip)', f, file_name=FNAME_ZIP)
	if os.path.exists(st.session_state['fname_results']):
	button_show_results = st.button('Show Result Table')
	if button_show_results:
	show_results()


	if __name__ == "__main__":
	main()


	#### Debugging options
	#st.write('Streamlit version:', st.__version__)
	#st.write('Session state:', st.session_state)
	#for key in st.session_state:
	# st.write(f"{key}: {st.session_state[key]}")