Context - Common#

[1]:

# Import to be able to import python package from src
import sys
sys.path.insert(0, '../src')

[2]:

import pandas as pd
from darts.datasets import EnergyDataset

[3]:

import ontime as on

Load data#

[4]:

ts = EnergyDataset().load()

Process the data#

[5]:

df = ts.pd_dataframe()
df = df.interpolate()
cols = ['generation biomass', 'generation solar', 'generation nuclear']
df = df[cols]

[6]:

ts = on.TimeSeries.from_dataframe(df)

[7]:

ts_uni = ts['generation solar'].slice(pd.Timestamp('2015'), pd.Timestamp('2016'))
ts_multi = ts.slice(pd.Timestamp('2015'), pd.Timestamp('2016'))

[8]:

train, test = ts_uni.split_after(pd.Timestamp('2015-09-01'))

Load Common Context#

[9]:

from ontime.context import common

Profiler#

[10]:

profiler = common.Profiler()

Daily Aggregation#

[16]:

day_mean = profiler.profile(ts_uni, profiler.Period.DAILY, profiler.Aggregation.MEAN).rename({"value": "day_mean"})
day_median = profiler.profile(ts_uni, profiler.Period.DAILY, profiler.Aggregation.MEDIAN).rename({"value": "day_median"})

[17]:

(
    on.Plot()
    .add(on.marks.line, day_mean)
    .add(on.marks.line, day_median)
    .show()
)

[17]:

Weekly Aggregation#

[18]:

week_mean = profiler.profile(ts_uni, profiler.Period.WEEKLY, profiler.Aggregation.MEAN).rename({"value": "week_mean"})
week_median = profiler.profile(ts_uni, profiler.Period.WEEKLY, profiler.Aggregation.MEDIAN).rename({"value": "week_median"})

[19]:

(
    on.Plot()
    .add(on.marks.line, week_mean)
    .add(on.marks.line, week_median)
    .show()
)

[19]:

Generic Predictor#

[20]:

model = common.GenericPredictor()

[24]:

model.fit(train)

[24]:

<ontime.context.common.generic_predictor.GenericPredictor at 0x7fc6c9b189d0>

What does the future looks like ?

[25]:

pred = model.predict(48)

[26]:

(
    on.Plot()
    .add(on.marks.line, train[-96:].rename({"generation solar": "Training set"}))
    .add(on.marks.line, pred.rename({"generation solar": "Prediction"}))
    .add(on.marks.line, test[:48].rename({"generation solar": "Truth"}), type="dashed")
    .properties(width=600, height=300)
    .show()
)

[26]:

Generic Detector#

[27]:

model = common.GenericDetector()

[28]:

model.fit(train)

[28]:

<ontime.context.common.generic_detector.GenericDetector at 0x7fc6c9a32ef0>

Does the current signal has problem ?

[29]:

detected_test = model.detect(test)

[30]:

(
    on.Plot(test[:72])
    .add(on.marks.line)
    .add(on.marks.mark, data=detected_test[:72].rename({"generation solar": "Anomalies"}), type="dot")
    .properties(width=600, height=300)
    .show()
)

[30]:

What if we want to have an idea about the future problems ?

[31]:

predetected = model.predetect(72)

[32]:

(
    on.Plot(test[:72])
    .add(on.marks.line)
    .add(on.marks.mark, data=predetected[:72].rename({"generation solar": "Anomalies"}), type="dot")
    .properties(width=600, height=300)
    .show()
)

[32]:

Data Quality Detector#

Detect values above an absolute threshold

[33]:

detector = common.DataQualityDetector(
    threshold_type='threshold',
    upper_threshold=3000
)

# Fitting on some data
detector.fit(test)

# Detecting on other
detector.detect(test[:72]).plot()

[33]:

Or between two thresholds

[34]:

detector = common.DataQualityDetector(
    threshold_type='threshold',
    upper_threshold=3000,
    lower_threshold=1000
)

# Fitting on some data
detector.fit(test)

# Detecting on other
detector.detect(test[:72]).plot()

[34]:

Or within a statistical range

[35]:

detector = common.DataQualityDetector(
    threshold_type='quantile',
    upper_threshold=0.8
)

# Fitting on some data
detector.fit(test)

# Detecting on other
detector.detect(test[:72]).plot()

[35]:

Missing Data Detector#

Creating data with NaNs

[36]:

import numpy as np

def add_random_nans(series, n=1):
    """
    Randomly add NaN values to a pandas Series.

    Parameters:
    - series (pd.Series): The pandas Series to modify.
    - n (int): The number of NaN values to add. Default is 1.

    Returns:
    - pd.Series: The modified pandas Series with NaN values.
    """
    n = min(n, len(series))
    nan_indices = np.random.choice(series.index, size=n, replace=False)
    series[nan_indices] = np.nan
    return series

[37]:

ts_w_nans = on.TimeSeries.from_series(add_random_nans(test.pd_series(), 300))

Detecting the NaNs

[38]:

detector = common.MissingDataDetector()

detector.detect(ts_w_nans[:72]).plot()

[38]: