Context - Common#

[1]:
# Import to be able to import python package from src
import sys
sys.path.insert(0, '../src')
[2]:
import pandas as pd
from darts.datasets import EnergyDataset
[3]:
import ontime as on

Load data#

[4]:
ts = EnergyDataset().load()

Process the data#

[5]:
df = ts.pd_dataframe()
df = df.interpolate()
cols = ['generation biomass', 'generation solar', 'generation nuclear']
df = df[cols]
[6]:
ts = on.TimeSeries.from_dataframe(df)
[7]:
ts_uni = ts['generation solar'].slice(pd.Timestamp('2015'), pd.Timestamp('2016'))
ts_multi = ts.slice(pd.Timestamp('2015'), pd.Timestamp('2016'))
[8]:
train, test = ts_uni.split_after(pd.Timestamp('2015-09-01'))

Load Common Context#

[9]:
from ontime.context import common

Profiler#

[10]:
profiler = common.Profiler()

Daily Aggregation#

[16]:
day_mean = profiler.profile(ts_uni, profiler.Period.DAILY, profiler.Aggregation.MEAN).rename({"value": "day_mean"})
day_median = profiler.profile(ts_uni, profiler.Period.DAILY, profiler.Aggregation.MEDIAN).rename({"value": "day_median"})
[17]:
(
    on.Plot()
    .add(on.marks.line, day_mean)
    .add(on.marks.line, day_median)
    .show()
)
[17]:

Weekly Aggregation#

[18]:
week_mean = profiler.profile(ts_uni, profiler.Period.WEEKLY, profiler.Aggregation.MEAN).rename({"value": "week_mean"})
week_median = profiler.profile(ts_uni, profiler.Period.WEEKLY, profiler.Aggregation.MEDIAN).rename({"value": "week_median"})
[19]:
(
    on.Plot()
    .add(on.marks.line, week_mean)
    .add(on.marks.line, week_median)
    .show()
)
[19]:

Generic Predictor#

[20]:
model = common.GenericPredictor()
[24]:
model.fit(train)
[24]:
<ontime.context.common.generic_predictor.GenericPredictor at 0x7fc6c9b189d0>

What does the future looks like ?

[25]:
pred = model.predict(48)
[26]:
(
    on.Plot()
    .add(on.marks.line, train[-96:].rename({"generation solar": "Training set"}))
    .add(on.marks.line, pred.rename({"generation solar": "Prediction"}))
    .add(on.marks.line, test[:48].rename({"generation solar": "Truth"}), type="dashed")
    .properties(width=600, height=300)
    .show()
)
[26]:

Generic Detector#

[27]:
model = common.GenericDetector()
[28]:
model.fit(train)
[28]:
<ontime.context.common.generic_detector.GenericDetector at 0x7fc6c9a32ef0>

Does the current signal has problem ?

[29]:
detected_test = model.detect(test)
[30]:
(
    on.Plot(test[:72])
    .add(on.marks.line)
    .add(on.marks.mark, data=detected_test[:72].rename({"generation solar": "Anomalies"}), type="dot")
    .properties(width=600, height=300)
    .show()
)
[30]:

What if we want to have an idea about the future problems ?

[31]:
predetected = model.predetect(72)
[32]:
(
    on.Plot(test[:72])
    .add(on.marks.line)
    .add(on.marks.mark, data=predetected[:72].rename({"generation solar": "Anomalies"}), type="dot")
    .properties(width=600, height=300)
    .show()
)
[32]:

Data Quality Detector#

Detect values above an absolute threshold

[33]:
detector = common.DataQualityDetector(
    threshold_type='threshold',
    upper_threshold=3000
)

# Fitting on some data
detector.fit(test)

# Detecting on other
detector.detect(test[:72]).plot()
[33]:

Or between two thresholds

[34]:
detector = common.DataQualityDetector(
    threshold_type='threshold',
    upper_threshold=3000,
    lower_threshold=1000
)

# Fitting on some data
detector.fit(test)

# Detecting on other
detector.detect(test[:72]).plot()
[34]:

Or within a statistical range

[35]:
detector = common.DataQualityDetector(
    threshold_type='quantile',
    upper_threshold=0.8
)

# Fitting on some data
detector.fit(test)

# Detecting on other
detector.detect(test[:72]).plot()
[35]:

Missing Data Detector#

Creating data with NaNs

[36]:
import numpy as np

def add_random_nans(series, n=1):
    """
    Randomly add NaN values to a pandas Series.

    Parameters:
    - series (pd.Series): The pandas Series to modify.
    - n (int): The number of NaN values to add. Default is 1.

    Returns:
    - pd.Series: The modified pandas Series with NaN values.
    """
    n = min(n, len(series))
    nan_indices = np.random.choice(series.index, size=n, replace=False)
    series[nan_indices] = np.nan
    return series
[37]:
ts_w_nans = on.TimeSeries.from_series(add_random_nans(test.pd_series(), 300))

Detecting the NaNs

[38]:
detector = common.MissingDataDetector()

detector.detect(ts_w_nans[:72]).plot()
[38]: