File size: 12,910 Bytes
2786463
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eff7924
2786463
eff7924
2786463
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
# Geodata-Harvester App
# Description: Streamlit app for the Geodata-Harvester package
# Author: Sebastian Haan

import streamlit as st
import sys
import os
import pandas as pd
import numpy as np
import yaml
import shutil
from types import SimpleNamespace
from PIL import Image
# Import the core package Geodata-Harvester
from geodata_harvester import harvest


## Limitations:
# The Google Earth Engine extension for Geodata-Harvester is not natively supported in Streamlit, 
# unless you run the app locally or provide GEE service account token.

FNAME_ZIP = 'harvest_collection.zip'
OUTPATH = 'harvest_collection'

# Link to settings templates
settings_template_link = 'https://huggingface.co/spaces/SIH/geodata-harvester-app/tree/main/data/templates_settings'
# link to data template
data_template_link = 'https://huggingface.co/spaces/SIH/geodata-harvester-app/tree/main/data'
# link to github page
link_to_githubpage = 'https://sydney-informatics-hub.github.io/geodata-harvester/'
# link to settings overview
link_to_settings_overview = 'https://github.com/Sydney-Informatics-Hub/geodata-harvester/blob/main/quarto/docs/Settings_Overview.md'

menu_dict = {
    "About": "https://sydney-informatics-hub.github.io/geodata-harvester/",
    "Get help": "https://github.com/Sydney-Informatics-Hub/geodata-harvester/"}

def init_header():
    # Add a title
    st.title('Geodata-Harvester App')
    st.subheader("Jumpstart your geospatial analysis.")
    image = Image.open('assets/dataharvester_streamlit_logo_blue.jpg')
    st.image(image)
    st.write(f"The Geodata-Harvester automates geodata download and spatio-temporal processing from a large range of datasources into ready-made datasets.\
        This application is a lightweight Streamlit wrapper crafted to work seamlessly with the Geodata-Harvester package.\
        For full accessibility to a comprehensive set of features and options, please visit the [Geodata-Harvester project]({link_to_githubpage}).")
    #st.divider()

def init_howto():
    #with workpanel.container():
    st.subheader('How to')
    st.write(f'1) Prepare a settings file that lists all requested layers and spatio-temporal settings (see [templates]({settings_template_link})). For more info please see section below.')
    st.write('2) Upload the settings file in the sidebar.')
    st.write('3) Optional: Select csv file including Latitude and Longitude points. This will enable the Geodata-Harvester to automatically populate the table with extracted data for all given locations.')
    st.write('4) Then click `Run Geodata-Harvester` on the top of the sidebar.')
    st.write('5) Download and processing of all data might take a couple of minutes. Once completed, you will be able to download all generated results as zip.')

def print_attr():
    st.subheader('Attributions and Acknowledgements')
    st.write('This software was developed by the Sydney Informatics Hub, a core research facility of the University of Sydney.')
    st.write('Acknowledgments are an important way for us to demonstrate the value we bring to your research. Your research outcomes are vital for ongoing funding of the Sydney Informatics Hub.')
    st.write('If you make use of this software for your research project, please include the following acknowledgment:')
    st.write('“This research was supported by the Sydney Informatics Hub, a Core Research Facility of the University of Sydney."')

def load_settings(settings_file):
    if settings_file is not None:
        fname_settings = settings_file#.getvalue()
        settings = yaml.load(settings_file, Loader=yaml.FullLoader)
        # Parse settings dictionary as namespace
        settings = SimpleNamespace(**settings)
        settings.date_min = str(settings.date_min)
        settings.date_max = str(settings.date_max)
        if st.session_state['data_file'] is not None:
            settings.infile = st.session_state['data_file']
        settings.outpath = OUTPATH
        return settings

def print_settings(settings):
    if settings:
        #with workpanel.container():
        st.header("Settings loaded:")
        for key in settings.__dict__:
            if key == "target_sources":
                st.write(f"settings.{key}:")
                for source in settings.target_sources:
                    st.write(f"   '{source}': {settings.target_sources[source]}")
            else:
                st.write(f"settings.{key} : {settings.__dict__[key]}")


def print_info():
    st.subheader('Settings Overview')
    st.write('The Geodata-Harvester is controlled by a settings file in YAML format, \
        which includes all user-defined settings and data layers for extraction and processing.')
    st.write('Example settings file can be found in the [templates]({settings_template_link}).')
    st.write('The settings file includes the following sections:')
    st.markdown("""yaml

    #Bounding Box as (lng_min, lat_min, lng_max, lat_max):
    #If not provided, the Geodata-Harvester will try to infer bbox from points in Locations file
    target_bbox: [149, -30, 149.9, -29]

    #Select start date:
    date_min: : 2023-01-01

    #Select end date:
    date_max: : 2023-02-01

    #Spatial Resolution [in arcsec]:
    target_res: 100.0

    # Number of time interval slices in given date range
    temp_intervals: 1 
    #Headername of Latitude in input file (only relevant if data file provided):
    colname_lat: Lat

    #Headername of Longitude in input file (only relevant if data file provided):
    colname_lng: Long

    target_sources:
    #Satellite data from Digital Earth Australia (optional)
    DEA:
    - landsat_barest_earth

    #National Digital Elevation Model (DEM) 1 Second (optional)
    DEM:
    - DEM
    - Slope
    - Aspect
    
    #Landscape Data (optional)
    Landscape:
    - Relief_300m

    #Radiometric Data (optional)
    Radiometric:
    - radmap2019_grid_dose_terr_awags_rad_2019
    - radmap2019_grid_dose_terr_filtered_awags_rad_2019

    # SILO Climate Data (optional)
    # temporal aggregation options: 'mean', 'median', 'sum', 'std', 'perc95', 'perc5', 'max', 'min'
    SILO:
        max_temp:
        - Median
        min_temp:
        - Median

    #Soil data from SLGA (optional)
    SLGA:
    Bulk_Density:
        - 0-5cm
    Clay:
        - 0-5cm
    """)
    st.subheader('Data Overview')
    # Data Overview list
    st.markdown("""
    The following data sources are currently supported by the Geodata-Harvester Streamlit app:
    - Soil Data 3D SLGA (Australia)
    - SILO Climate Database (Australia)
    - National Digital Elevation Model incl. Slope, Aspect (Australia)
    - Digital Earth Australia Geoscience Earth Observations (incl. Sentinel, Landsat, MODIS for Australia)
    - Radiometric Data (Australia)
    - Landscape Data (Australia)
    For more information, please visit the [Geodata-Harvester Data Overview]({https://github.com/Sydney-Informatics-Hub/geodata-harvester/blob/main/quarto/docs/Data_Overview.md}).
    Note that Google Earth Engine is not supported in streamlit app unless you have a GEE token or run the app locally.
    """)

def load_data(data_file):
    # load data into pandas dataframe from csv file
    if data_file is not None:
        #st.sidebar.write(f"Loading data from {data_file}")
        df = pd.read_csv(data_file)
        return df

def show_results():
    df_results = pd.read_csv(st.session_state['fname_results'])
    st.write('Extracted Data Table:')
    st.write(df_results)

def zip_folder(folder_path):
    filename = FNAME_ZIP.split('.')[0]
    directory = folder_path
    shutil.make_archive(filename, "zip", folder_path)

def gee_init():
    # initialize earth engine (experimental)
    st.sidebar.header('Google Earth Engine (Optional)')
    st.sidebar.write(f'Please copy your Google Earth Engine authentication token in the field below.')
    os.environ['EARTHENGINE_TOKEN'] = st.sidebar.text_input(
            label="Enter Google Earth Engine Token:",
            type="password"
        ).strip()

def open_sidebar():
    # Add run 
    st.sidebar.header('Run Geodata-Harvester')
    st.sidebar.write(f'This will harvest geodata as specified in the settings file below.')
    button_run = st.sidebar.button('Run Geodata-Harvester')

    st.sidebar.divider()

    # Add yaml file settings file upload
    st.sidebar.header('Settings File')
    settings_file = st.sidebar.file_uploader(f"Choose a yaml settings file (see [templates]({settings_template_link}))")
    button_show_settings = st.sidebar.button('Show Settings')
    st.session_state['settings_file'] = settings_file

    st.sidebar.divider()

    # Add data file uploader
    st.sidebar.header('Locations File')
    data_file = st.sidebar.file_uploader(f"Upload a CSV file with Latitudes and Longitudes (see [example data csv]({data_template_link}))")
    button_show_data = st.sidebar.button('Show Input Data')
    st.session_state['data_file'] = data_file

    st.sidebar.divider()

    # optional
    gee_init()
    
    return button_show_settings, button_show_data, button_run

def run_harvester():
    if ('settings_file' in st.session_state) & (st.session_state['settings_file'] is not None):
        fname_settings = st.session_state['settings_file']
        settings = load_settings(fname_settings)
        if ('data_file2' in st.session_state) & (st.session_state['data_file2'] is not None):
            settings.infile = st.session_state['data_file2'] #.name
            #st.write(f"settings.infile: {settings.infile}")
        #outpath = settings.outpath
        os.makedirs(name=OUTPATH, exist_ok=True)
        # write settings to disk
        with open(os.path.join(OUTPATH,'settings.yaml'), 'w') as file:
            yaml.dump(settings.__dict__, file)
        with st.spinner('Running Geodata-Harvester...'):
            df = harvest.run(os.path.join(OUTPATH,'settings.yaml'), return_df = True)
        st.success('Harvest complete!', icon="✅")
        return df 
    else:
        st.error('Please upload settings file.')
        return None



##########   Main  ###########

def main():
    #st.set_page_config(layout="wide")
    st.set_page_config(page_title="Geodata-Harvester App",
        layout="centered",
        menu_items=menu_dict)
    #st.markdown(footer, unsafe_allow_html=True)
    init_header()
    with st.expander("How to use this app"):
        init_howto()
    with st.expander("Settings and Data Overview"):
        print_info()
    with st.expander("Attributions and Acknowledgements"):
        print_attr()
    #st.divider()
    workpanel = st.empty()
    button_show_settings, button_show_data, button_run = open_sidebar()

    if button_show_settings:
        settings = load_settings(st.session_state['settings_file'])
        print_settings(settings)

    if button_show_data:
        data_file = st.session_state['data_file']
        df = load_data(data_file)
        st.header("Data Table:")
        st.write(df)

    if button_run:
        if ('data_file' in st.session_state) & (st.session_state['data_file'] is not None):
            # save data so it can be read by geopandas later
            df = load_data(st.session_state['data_file'])
            df.to_csv('temp_data.csv')
            st.session_state['data_file2'] = 'temp_data.csv'
        else:
            st.session_state['data_file2'] = None
        if ('settings_file' in st.session_state) & (st.session_state['settings_file'] is not None):
            df_results = run_harvester()
            # zip result folder in python and make available for download
            with st.spinner(f'Generating {FNAME_ZIP} file for download...'):
                zip_folder(OUTPATH)
                st.session_state['fname_results_zip'] = FNAME_ZIP
                st.success('Zip file generated!', icon="✅")
            # delete temp data file
            if os.path.exists('temp_data.csv'):
                os.remove('temp_data.csv')
            #shutil.rmtree(outpath)
            # show results
            st.session_state['fname_results'] = os.path.join(OUTPATH,'results.csv')
        else:
            #workpanel.empty()
            #with workpanel.container():
            st.text('No settings file provided. Please add settings file.')

    # Show options and buttons after run
    if 'fname_results_zip' in st.session_state:
        st.write('Harvester Results:')
        with open(FNAME_ZIP, 'rb') as f:
            st.download_button('Download Results (zip)', f, file_name=FNAME_ZIP)
        if os.path.exists(st.session_state['fname_results']):
            button_show_results = st.button('Show Result Table')
            if button_show_results:
                show_results()


if __name__ == "__main__":
    main()


#### Debugging options
#st.write('Streamlit version:', st.__version__)
#st.write('Session state:', st.session_state)
#for key in st.session_state:
#    st.write(f"{key}: {st.session_state[key]}")