Zack
chore: Fix clobbered code
1f82d2d
raw
history blame
4.12 kB
import gradio as gr
from huggingface_hub import from_pretrained_keras
import pandas as pd
import numpy as np
import json
from matplotlib import pyplot as plt
f = open('scaler.json')
scaler = json.load(f)
TIME_STEPS = 288
# Generated training sequences for use in the model.
def create_sequences(values, time_steps=TIME_STEPS):
output = []
for i in range(len(values) - time_steps + 1):
output.append(values[i : (i + time_steps)])
return np.stack(output)
def normalize_data(data):
df_test_value = (data - scaler["mean"]) / scaler["std"]
return df_test_value
def plot_test_data(df_test_value):
fig, ax = plt.subplots(figsize=(12, 6))
df_test_value.plot(legend=False, ax=ax)
ax.set_xlabel("Time")
ax.set_ylabel("Value")
ax.set_title("Input Test Data")
return fig
def get_anomalies(df_test_value):
# Create sequences from test values.
x_test = create_sequences(df_test_value.values)
model = from_pretrained_keras("keras-io/timeseries-anomaly-detection")
# Get test MAE loss.
x_test_pred = model.predict(x_test)
test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
test_mae_loss = test_mae_loss.reshape((-1))
# Detect all the samples which are anomalies.
anomalies = test_mae_loss > scaler["threshold"]
return anomalies
def plot_anomalies(df_test_value, data, anomalies):
# data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
anomalous_data_indices = []
for data_idx in range(TIME_STEPS - 1, len(df_test_value) - TIME_STEPS + 1):
if np.all(anomalies[data_idx - TIME_STEPS + 1 : data_idx]):
anomalous_data_indices.append(data_idx)
df_subset = data.iloc[anomalous_data_indices]
fig, ax = plt.subplots(figsize=(12, 6))
data.plot(legend=False, ax=ax)
df_subset.plot(legend=False, ax=ax, color="r")
ax.set_xlabel("Time")
ax.set_ylabel("Value")
ax.set_title("Anomalous Data Points")
return fig
def clean_data(df):
# Check if the DataFrame already contains the correct columns
if "timestamp" in df.columns and "value" in df.columns:
df["timestamp"] = pd.to_datetime(df["timestamp"])
return df
# Check if DataFrame contains the columns to be converted
elif "Date" in df.columns and "Hour" in df.columns and "Hourly_Labor_Hours_Total" in df.columns:
# Convert "Date" and "Hour" columns into datetime format
df["timestamp"] = pd.to_datetime(df["Date"]) + pd.to_timedelta(df["Hour"].astype(int), unit='h')
# Handle the case where hour is 24
df.loc[df["timestamp"].dt.hour == 24, "timestamp"] = df["timestamp"] + pd.DateOffset(days=1)
df["timestamp"] = df["timestamp"].dt.floor('h')
# Keep only necessary columns
df = df[["timestamp", "Hourly_Labor_Hours_Total"]]
# Rename column
df.rename(columns={"Hourly_Labor_Hours_Total": "value"}, inplace=True)
return df
else:
raise ValueError("Dataframe does not contain necessary columns.")
def master(file):
# read file
data = pd.read_csv(file.name)
# clean data
data = clean_data(data)
# Convert timestamp to datetime after cleaning
data['timestamp'] = pd.to_datetime(data['timestamp'])
data.set_index("timestamp", inplace=True)
# Check if data has enough records to create sequences
if len(data) < TIME_STEPS:
return "Not enough data to create sequences. Need at least {} records.".format(TIME_STEPS)
df_test_value = normalize_data(data)
# plot input test data
plot1 = plot_test_data(df_test_value)
# predict
anomalies = get_anomalies(df_test_value)
#plot anomalous data points
plot2 = plot_anomalies(df_test_value, data, anomalies)
return plot2
outputs = gr.outputs.Image()
iface = gr.Interface(
fn=master,
inputs=gr.inputs.File(label="CSV File"),
outputs=outputs,
examples=["art_daily_jumpsup.csv","labor_hourly_short.csv"],
title="Timeseries Anomaly Detection Using an Autoencoder",
description="Anomaly detection of timeseries data."
)
iface.launch()