Context - Common#
[1]:
# Import to be able to import python package from src
import sys
sys.path.insert(0, '../src')
[2]:
import pandas as pd
from darts.datasets import EnergyDataset
[3]:
import ontime as on
Load data#
[4]:
ts = EnergyDataset().load()
Process the data#
[5]:
df = ts.pd_dataframe()
df = df.interpolate()
cols = ['generation biomass', 'generation solar', 'generation nuclear']
df = df[cols]
[6]:
ts = on.TimeSeries.from_dataframe(df)
[7]:
ts_uni = ts['generation solar'].slice(pd.Timestamp('2015'), pd.Timestamp('2016'))
ts_multi = ts.slice(pd.Timestamp('2015'), pd.Timestamp('2016'))
[8]:
train, test = ts_uni.split_after(pd.Timestamp('2015-09-01'))
Load Common Context#
[9]:
from ontime.context import common
Profiler#
[10]:
profiler = common.Profiler()
Daily Aggregation#
[16]:
day_mean = profiler.profile(ts_uni, profiler.Period.DAILY, profiler.Aggregation.MEAN).rename({"value": "day_mean"})
day_median = profiler.profile(ts_uni, profiler.Period.DAILY, profiler.Aggregation.MEDIAN).rename({"value": "day_median"})
[17]:
(
on.Plot()
.add(on.marks.line, day_mean)
.add(on.marks.line, day_median)
.show()
)
[17]:
Weekly Aggregation#
[18]:
week_mean = profiler.profile(ts_uni, profiler.Period.WEEKLY, profiler.Aggregation.MEAN).rename({"value": "week_mean"})
week_median = profiler.profile(ts_uni, profiler.Period.WEEKLY, profiler.Aggregation.MEDIAN).rename({"value": "week_median"})
[19]:
(
on.Plot()
.add(on.marks.line, week_mean)
.add(on.marks.line, week_median)
.show()
)
[19]:
Generic Predictor#
[20]:
model = common.GenericPredictor()
[24]:
model.fit(train)
[24]:
<ontime.context.common.generic_predictor.GenericPredictor at 0x7fc6c9b189d0>
What does the future looks like ?
[25]:
pred = model.predict(48)
[26]:
(
on.Plot()
.add(on.marks.line, train[-96:].rename({"generation solar": "Training set"}))
.add(on.marks.line, pred.rename({"generation solar": "Prediction"}))
.add(on.marks.line, test[:48].rename({"generation solar": "Truth"}), type="dashed")
.properties(width=600, height=300)
.show()
)
[26]:
Generic Detector#
[27]:
model = common.GenericDetector()
[28]:
model.fit(train)
[28]:
<ontime.context.common.generic_detector.GenericDetector at 0x7fc6c9a32ef0>
Does the current signal has problem ?
[29]:
detected_test = model.detect(test)
[30]:
(
on.Plot(test[:72])
.add(on.marks.line)
.add(on.marks.mark, data=detected_test[:72].rename({"generation solar": "Anomalies"}), type="dot")
.properties(width=600, height=300)
.show()
)
[30]:
What if we want to have an idea about the future problems ?
[31]:
predetected = model.predetect(72)
[32]:
(
on.Plot(test[:72])
.add(on.marks.line)
.add(on.marks.mark, data=predetected[:72].rename({"generation solar": "Anomalies"}), type="dot")
.properties(width=600, height=300)
.show()
)
[32]:
Data Quality Detector#
Detect values above an absolute threshold
[33]:
detector = common.DataQualityDetector(
threshold_type='threshold',
upper_threshold=3000
)
# Fitting on some data
detector.fit(test)
# Detecting on other
detector.detect(test[:72]).plot()
[33]:
Or between two thresholds
[34]:
detector = common.DataQualityDetector(
threshold_type='threshold',
upper_threshold=3000,
lower_threshold=1000
)
# Fitting on some data
detector.fit(test)
# Detecting on other
detector.detect(test[:72]).plot()
[34]:
Or within a statistical range
[35]:
detector = common.DataQualityDetector(
threshold_type='quantile',
upper_threshold=0.8
)
# Fitting on some data
detector.fit(test)
# Detecting on other
detector.detect(test[:72]).plot()
[35]:
Missing Data Detector#
Creating data with NaNs
[36]:
import numpy as np
def add_random_nans(series, n=1):
"""
Randomly add NaN values to a pandas Series.
Parameters:
- series (pd.Series): The pandas Series to modify.
- n (int): The number of NaN values to add. Default is 1.
Returns:
- pd.Series: The modified pandas Series with NaN values.
"""
n = min(n, len(series))
nan_indices = np.random.choice(series.index, size=n, replace=False)
series[nan_indices] = np.nan
return series
[37]:
ts_w_nans = on.TimeSeries.from_series(add_random_nans(test.pd_series(), 300))
Detecting the NaNs
[38]:
detector = common.MissingDataDetector()
detector.detect(ts_w_nans[:72]).plot()
[38]: