personalized-forecasting

Sleeping

File size: 17,344 Bytes

f3f15dd

from time import time

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import streamlit as st
from datasetsforecast.losses import rmse, mae, smape, mse, mape
from st_aggrid import AgGrid

from src.nf import MODELS, forecast_pretrained_model
from src.model_descriptions import model_cards

DATASETS = {
    "Electricity (Ercot COAST)": "https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/ercot_COAST.csv",
    #"Electriciy (ERCOT, multiple markets)": "https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/ercot_multiple_ts.csv",
    "Web Traffic (Peyton Manning)": "https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/peyton_manning.csv",
    "Demand (AirPassengers)": "https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/air_passengers.csv",
    "Finance (Exchange USD-EUR)": "https://raw.githubusercontent.com/Nixtla/transfer-learning-time-series/main/datasets/usdeur.csv",
}


@st.cache_data
def convert_df(df):
    # IMPORTANT: Cache the conversion to prevent computation on every rerun
    return df.to_csv(index=False).encode("utf-8")


def plot(df, uid, df_forecast, model):
    figs = []
    figs += [
        go.Scatter(
            x=df["ds"],
            y=df["y"],
            mode="lines",
            marker=dict(color="#236796"),
            legendrank=1,
            name=uid,
        ),
    ]
    if df_forecast is not None:
        ds_f = df_forecast["ds"].to_list()
        lo = df_forecast["forecast_lo_90"].to_list()
        hi = df_forecast["forecast_hi_90"].to_list()
        figs += [
            go.Scatter(
                x=ds_f + ds_f[::-1],
                y=hi + lo[::-1],
                fill="toself",
                fillcolor="#E7C4C0",
                mode="lines",
                line=dict(color="#E7C4C0"),
                name="Prediction Intervals (90%)",
                legendrank=5,
                opacity=0.5,
                hoverinfo="skip",
            ),
            go.Scatter(
                x=ds_f,
                y=df_forecast["forecast"],
                mode="lines",
                legendrank=4,
                marker=dict(color="#E7C4C0"),
                name=f"Forecast {uid}",
            ),
        ]
    fig = go.Figure(figs)
    fig.update_layout(
        {"plot_bgcolor": "rgba(0, 0, 0, 0)", "paper_bgcolor": "rgba(0, 0, 0, 0)"}
    )
    fig.update_layout(
        title=f"Forecasts for {uid} using Transfer Learning (from {model})",
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        margin=dict(l=20, b=20),
        xaxis=dict(rangeslider=dict(visible=True)),
    )
    initial_range = [df.tail(200)["ds"].iloc[0], ds_f[-1]]
    fig["layout"]["xaxis"].update(range=initial_range)
    return fig


def st_transfer_learning():
    st.set_page_config(
        page_title="Time Series Visualization",
        page_icon="🔮",
        layout="wide",
        initial_sidebar_state="expanded",
    )

    st.title(
        "Transfer Learning: Revolutionizing Time Series by Nixtla"
    )
    st.write(
        "<style>div.block-container{padding-top:2rem;}</style>", unsafe_allow_html=True
    )

    intro = """

        The success of startups like Open AI and Stability highlights the potential for transfer learning (TL) techniques to have a similar impact on the field of time series forecasting.



        TL can achieve lightning-fast predictions with a fraction of the computational cost by pre-training a flexible model on a large dataset and then using it on another dataset with little to no additional training.



        In this live demo, you can use pre-trained models by Nixtla (trained on the M4 dataset) to predict your own datasets. You can also see how the models perform on unseen example datasets.

	"""
    st.write(intro)

    required_cols = ["ds", "y"]

    with st.sidebar.expander("Dataset", expanded=False):
        data_selection = st.selectbox("Select example dataset", DATASETS.keys())
        data_url = DATASETS[data_selection]
        url_json = st.text_input("Data (you can pass your own url here)", data_url)
        st.write(
            "You can also upload a CSV file like [this one](https://github.com/Nixtla/transfer-learning-time-series/blob/main/datasets/air_passengers.csv)."
        )

        uploaded_file = st.file_uploader("Upload CSV")
        with st.form("Data"):

            if uploaded_file is not None:
                df = pd.read_csv(uploaded_file)
                cols = df.columns
                timestamp_col = st.selectbox("Timestamp column", options=cols)
                value_col = st.selectbox("Value column", options=cols)
            else:
                timestamp_col = st.text_input("Timestamp column", value="timestamp")
                value_col = st.text_input("Value column", value="value")
            st.write("You must press Submit each time you want to forecast.")
            submitted = st.form_submit_button("Submit")
            if submitted:
                if uploaded_file is None:
                    st.write("Please provide a dataframe.")
                    if url_json.endswith("json"):
                        df = pd.read_json(url_json)
                    else:
                        df = pd.read_csv(url_json)
                    df = df.rename(
                        columns=dict(zip([timestamp_col, value_col], required_cols))
                    )
                else:
                    # df = pd.read_csv(uploaded_file)
                    df = df.rename(
                        columns=dict(zip([timestamp_col, value_col], required_cols))
                    )
            else:
                if url_json.endswith("json"):
                    df = pd.read_json(url_json)
                else:
                    df = pd.read_csv(url_json)
                cols = df.columns
                if "unique_id" in cols:
                    cols = cols[-2:]
                df = df.rename(columns=dict(zip(cols, required_cols)))

            if "unique_id" not in df:
                df.insert(0, "unique_id", "ts_0")

            df["ds"] = pd.to_datetime(df["ds"])
            df = df.sort_values(["unique_id", "ds"])

    with st.sidebar:
        st.write("Define the pretrained model you want to use to forecast your data")
        model_name = st.selectbox("Select your model", tuple(MODELS.keys()))
        model_file = MODELS[model_name]["model"]
        st.write("Choose how many steps you want to forecast")
        fh = st.number_input("Forecast horizon", value=18)
        st.write(
            "Choose for how many steps the pretrained model will be updated using your data (use 0 for fast computation)"
        )
        max_steps = st.number_input("N-shot inference", value=0)

    # tabs
    tab_fcst, tab_cv, tab_docs, tab_nixtla = st.tabs(
        [
            "📈 Forecast",
            "🔎 Cross Validation",
            "📚 Documentation",
            "🔮 Nixtlaverse",
        ]
    )

    uids = df["unique_id"].unique()
    fcst_cols = ["forecast_lo_90", "forecast", "forecast_hi_90"]

    with tab_fcst:
        uid = uids[0]#st.selectbox("Dataset", options=uids)
        col1, col2 = st.columns([2, 4])
        with col1:
            tab_insample, tab_forecast = st.tabs(
                ["Modify input data", "Modify forecasts"]
            )
            with tab_insample:
                df_grid = df.query("unique_id == @uid").drop(columns="unique_id")
                grid_table = AgGrid(
                    df_grid,
                    editable=True,
                    theme="streamlit",
                    fit_columns_on_grid_load=True,
                    height=360,
                )
                df.loc[df["unique_id"] == uid, "y"] = (
                    grid_table["data"].sort_values("ds")["y"].values
                )
                # forecast code
                init = time()
                df_forecast = forecast_pretrained_model(df, model_file, fh, max_steps)
                end = time()
                df_forecast = df_forecast.rename(
                    columns=dict(zip(["y_5", "y_50", "y_95"], fcst_cols))
                )
            with tab_forecast:
                df_fcst_grid = df_forecast.query("unique_id == @uid").filter(
                    ["ds", "forecast"]
                )
                grid_fcst_table = AgGrid(
                    df_fcst_grid,
                    editable=True,
                    theme="streamlit",
                    fit_columns_on_grid_load=True,
                    height=360,
                )
                changes = (
                    df_forecast.query("unique_id == @uid")["forecast"].values
                    - grid_fcst_table["data"].sort_values("ds")["forecast"].values
                )
                for col in fcst_cols:
                    df_forecast.loc[df_forecast["unique_id"] == uid, col] = (
                        df_forecast.loc[df_forecast["unique_id"] == uid, col] - changes
                    )
        with col2:
            st.plotly_chart(
                plot(
                    df.query("unique_id == @uid"),
                    uid,
                    df_forecast.query("unique_id == @uid"),
                    model_name,
                ),
                use_container_width=True,
            )
            st.success(f'Done! Approximate inference time CPU: {0.7*(end-init):.2f} seconds.')

    with tab_cv:
        col_uid, col_n_windows = st.columns(2)
        uid = uids[0]
        #with col_uid:
        #    uid = st.selectbox("Time series to analyse", options=uids, key="uid_cv")
        with col_n_windows:
            n_windows = st.number_input("Cross validation windows", value=1)
        df_forecast = []
        for i_window in range(n_windows, 0, -1):
            test = df.groupby("unique_id").tail(i_window * fh)
            df_forecast_w = forecast_pretrained_model(
                df.drop(test.index), model_file, fh, max_steps
            )
            df_forecast_w = df_forecast_w.rename(
                columns=dict(zip(["y_5", "y_50", "y_95"], fcst_cols))
            )
            df_forecast_w.insert(2, "window", i_window)
            df_forecast.append(df_forecast_w)
        df_forecast = pd.concat(df_forecast)
        df_forecast["ds"] = pd.to_datetime(df_forecast["ds"])
        df_forecast = df_forecast.merge(df, how="left", on=["unique_id", "ds"])
        metrics = [mae, mape, rmse, smape]
        evaluation = df_forecast.groupby(["unique_id", "window"]).apply(
            lambda df: [f'{fn(df["y"].values, df["forecast"]):.2f}' for fn in metrics]
        )
        evaluation = evaluation.rename("eval").reset_index()
        evaluation["eval"] = evaluation["eval"].str.join(",")
        evaluation[["MAE", "MAPE", "RMSE", "sMAPE"]] = evaluation["eval"].str.split(
            ",", expand=True
        )
        col_eval, col_plot = st.columns([2, 4])
        with col_eval:
            st.write("Evaluation metrics for each cross validation window")
            st.dataframe(
                evaluation.query("unique_id == @uid")
                .drop(columns=["unique_id", "eval"])
                .set_index("window")
            )
        with col_plot:
            st.plotly_chart(
                plot(
                    df.query("unique_id == @uid"),
                    uid,
                    df_forecast.query("unique_id == @uid").drop(columns="y"),
                    model_name,
                ),
                use_container_width=True,
            )
    with tab_docs:
        tab_transfer, tab_desc, tab_ref = st.tabs(
            [
                "🚀 Transfer Learning",
                "🔎 Description of the model",
                "📚 References",
            ]
        )

        with tab_desc:
            model_card_name = MODELS[model_name]["card"]
            st.subheader("Abstract")
            st.write(f"""{model_cards[model_card_name]['Abstract']}""")
            st.subheader("Intended use")
            st.write(f"""{model_cards[model_card_name]['Intended use']}""")
            st.subheader("Secondary use")
            st.write(f"""{model_cards[model_card_name]['Secondary use']}""")
            st.subheader("Limitations")
            st.write(f"""{model_cards[model_card_name]['Limitations']}""")
            st.subheader("Training data")
            st.write(f"""{model_cards[model_card_name]['Training data']}""")
            st.subheader("BibTex/Citation Info")
            st.code(f"""{model_cards[model_card_name]['Citation Info']}""")

        with tab_transfer:
            transfer_text = """

                Transfer learning refers to the process of pre-training a flexible model on a large dataset and using it later on other data with little to no training. It is one of the most outstanding 🚀 achievements in Machine Learning 🧠 and has many practical applications.



                For time series forecasting, the technique allows you to get lightning-fast predictions ⚡ bypassing the tradeoff between accuracy and speed.



                [This notebook](https://colab.research.google.com/drive/1uFCO2UBpH-5l2fk3KmxfU0oupsOC6v2n?authuser=0&pli=1#cell-5=) shows how to generate a pre-trained model and store it in a checkpoint to make it available for public use to forecast new time series never seen by the model. 

                **You can contribute with your pre-trained models by following [this Notebook](https://github.com/Nixtla/transfer-learning-time-series/blob/main/nbs/Transfer_Learning.ipynb) and sending us an email at federico[at]nixtla.io**



                You can also take a look at list of pretrained models here.  Currently we have this ones avaiable in our [API](https://docs.nixtla.io/reference/neural_transfer_neural_transfer_post) or [Demo](http://nixtla.io/transfer-learning/). You can also download the `.ckpt`:

                - [Pretrained N-HiTS M4 Hourly](https://nixtla-public.s3.amazonaws.com/transfer/pretrained_models/nhits_m4_hourly.ckpt)

                - [Pretrained N-HiTS M4 Hourly (Tiny)](https://nixtla-public.s3.amazonaws.com/transfer/pretrained_models/nhits_m4_hourly_tiny.ckpt)

                - [Pretrained N-HiTS M4 Daily](https://nixtla-public.s3.amazonaws.com/transfer/pretrained_models/nhits_m4_daily.ckpt)

                - [Pretrained N-HiTS M4 Monthly](https://nixtla-public.s3.amazonaws.com/transfer/pretrained_models/nhits_m4_monthly.ckpt)

                - [Pretrained N-HiTS M4 Yearly](https://nixtla-public.s3.amazonaws.com/transfer/pretrained_models/nhits_m4_yearly.ckpt)

                - [Pretrained N-BEATS M4 Hourly](https://nixtla-public.s3.amazonaws.com/transfer/pretrained_models/nbeats_m4_hourly.ckpt)

                - [Pretrained N-BEATS M4 Daily](https://nixtla-public.s3.amazonaws.com/transfer/pretrained_models/nbeats_m4_daily.ckpt)

                - [Pretrained N-BEATS M4 Weekly](https://nixtla-public.s3.amazonaws.com/transfer/pretrained_models/nbeats_m4_weekly.ckpt)

                - [Pretrained N-BEATS M4 Monthly](https://nixtla-public.s3.amazonaws.com/transfer/pretrained_models/nbeats_m4_monthly.ckpt)

                - [Pretrained N-BEATS M4 Yearly](https://nixtla-public.s3.amazonaws.com/transfer/pretrained_models/nbeats_m4_yearly.ckpt)

                """
            st.write(transfer_text)

        with tab_ref:
            ref_text = """

                If you are interested in the transfer learning literature applied to time series forecasting, take a look at these papers:

                - [Meta-learning framework with applications to zero-shot time-series forecasting](https://arxiv.org/abs/2002.02887)

                - [N-HiTS: Neural Hierarchical Interpolation for Time Series Forecasting](https://arxiv.org/abs/2201.12886)

            """
            st.write(ref_text)

    with tab_nixtla:
        nixtla_text = """

            Nixtla is a startup that is building forecasting software for Data Scientists and Devs.



            We have been developing different open source libraries for machine learning, statistical and deep learning forecasting.



            In our [GitHub repo](https://github.com/Nixtla), you can find the projects that support this APP.

        """
        st.write(nixtla_text)
        st.image(
            "https://files.readme.io/168cdb2-Screen_Shot_2022-09-30_at_10.40.09.png",
            width=800,
        )

    with st.sidebar:
        st.download_button(
            label="Download historical data as CSV",
            data=convert_df(df),
            file_name="history.csv",
            mime="text/csv",
        )
        st.download_button(
            label="Download forecasts as CSV",
            data=convert_df(df_forecast),
            file_name="forecasts.csv",
            mime="text/csv",
        )


if __name__ == "__main__":
    st_transfer_learning()