Plots#
[1]:
# Import to be able to import python package from src
import sys
sys.path.insert(0, '../src')
[2]:
import pandas as pd
import ontime as on
from darts.datasets import EnergyDataset
Load data#
[3]:
ts = EnergyDataset().load()
Complete TimeSeries
[4]:
df = ts.pd_dataframe()
df = df.interpolate()
cols = ['generation biomass', 'generation solar', 'generation nuclear']
df = df[cols]
[5]:
ts = on.TimeSeries.from_dataframe(df)
Prepare data
[6]:
ts_uni = ts['generation solar'].slice(pd.Timestamp('2015'), pd.Timestamp('2016'))
ts_multi = ts.slice(pd.Timestamp('2015'), pd.Timestamp('2016'))
Primitive Plots#
Line(s)#
With univariate TimeSeries
[7]:
on.Plot(ts_uni.head(400))\
.add(on.marks.line)\
.properties(width=600)\
.show()
[7]:
with multivariate TimeSeries
[8]:
on.Plot(ts_multi.head(400))\
.add(on.marks.line)\
.properties(width=600)\
.show()
[8]:
Dots#
With univariate TimeSeries
[9]:
on.Plot(ts_uni.head(400))\
.add(on.marks.dots)\
.properties(width=600)\
.show()
[9]:
with multivariate TimeSeries
[10]:
on.Plot(ts_multi.head(400))\
.add(on.marks.dots)\
.properties(width=600)\
.show()
[10]:
Areas#
With a single time series
[11]:
on.Plot(ts_uni.head(400))\
.add(on.marks.area)\
.properties(width=600)\
.show()
[11]:
With a multivariate time series it works with exactly two
[12]:
from darts import concatenate
[13]:
# First we create the series with two components
ts_ci = concatenate([
ts_multi.univariate_component(0),
ts_multi.univariate_component(1)
], axis=1)
ts_ci = on.TimeSeries.from_darts(ts_ci)
[14]:
# Then we plot it
on.Plot(ts_ci.head(200))\
.add(on.marks.area, title='Diff. between solar and biomass generation')\
.properties(width=600, height=200)\
.show()
[14]:
Heatmaps#
with univariate TimeSeries
[15]:
on.Plot(ts_uni.head(1000))\
.add(on.marks.heatmap)\
.properties(width=600, height=50)\
.show()
[15]:
with multivariate Heatmap
[16]:
on.Plot(ts_multi.head(1000))\
.add(on.marks.heatmap)\
.properties(width=600, height=150)\
.show()
[16]:
Combined Plots#
Most of the plots in onTime can be combined as they are based on Altair layered charts. For instance, you can do the following to have a dots on a line.
[17]:
on.Plot()\
.add(on.marks.dots, ts_multi.univariate_component(1).head(400))\
.add(on.marks.line, ts_multi.univariate_component(0).head(400))\
.properties(width=600, height=200)\
.show()
[17]:
Thematic Plots#
Forecasts#
[18]:
ts_train, ts_test = ts_uni.split_before(0.9)
[19]:
from ontime.context import common
[20]:
model = common.GenericPredictor()
model.fit(ts_train)
[20]:
<ontime.context.common.generic_predictor.GenericPredictor at 0x7f95d3d5b010>
[21]:
ts_pred = model.predict(24 * 3)
[22]:
ts_train = ts_train.rename({'generation solar':'Training set'})
ts_test = ts_test.rename({'generation solar':'Test set'})
ts_pred = ts_pred.rename({'generation solar':'Forecast'})
Plot a prediction
[23]:
(
on.Plot()
.add(on.marks.line, ts_test.head(24 * 3), type='dashed')
.add(on.marks.line, ts_train.tail(24 * 4))
.add(on.marks.line, ts_pred)
.properties(width=600, height=200)
.show()
)
[23]:
Anomalies#
Create the mock data
[24]:
td_point = on.detectors.quantile(high_quantile=0.99)
td_collective = on.detectors.threshold(low_threshold=-30)
td_contextual = on.detectors.quantile(high_quantile=0.98)
Add anomalies
[25]:
import numpy as np
import random
def add_point_anomalies(ts, n, value):
df = ts.pd_dataframe()
random_indices = np.random.choice(df.index, size=n, replace=False)
df.loc[random_indices] = value
return on.TimeSeries.from_dataframe(df)
def add_collective_anomalies(ts, n, min_duration=10, max_duration=20):
df = ts.pd_dataframe()
for i in range(n+1):
block_duration = random.randint(min_duration, max_duration)
start_index = np.random.choice(df.index[:-block_duration])
end_index = start_index + pd.Timedelta(days=block_duration - 1)
df.loc[start_index:end_index] = -40
return on.TimeSeries.from_dataframe(df)
Select univariate component
[26]:
ts = ts.univariate_component(0)
[27]:
ts = add_point_anomalies(ts, 10, 30)
ts = add_collective_anomalies(ts, 4)
Create binary time series
[28]:
td_point.fit(ts)
td_contextual.fit(ts)
ts_ano_point = td_point.detect(ts)
ts_ano_collective = td_collective.detect(ts)
ts_ano_contextual = td_contextual.detect(ts)
[29]:
ts_ano_point = ts_ano_point.rename({'generation biomass': 'Ponctual anomalies'})
ts_ano_collective = ts_ano_collective.rename({'generation biomass': 'Collective anomalies'})
ts_ano_contextual = ts_ano_contextual.rename({'generation biomass': 'Contextual anomalies'})
Plot the time series with marked anomalies
[82]:
# Define windows for plotting
start = 24 * 7 * 54
duration = 24 * 7 * 15
end = start + duration
[83]:
# Actually plot
(
on.Plot(ts[start:end])
.add(on.marks.mark, data=ts_ano_contextual[start:end], type='highlight')
.add(on.marks.mark, data=ts_ano_collective[start:end], type='background')
.add(on.marks.line)
.add(on.marks.mark, data=ts_ano_point[start:end], type='dot')
.properties(width=800, height=200)
.show()
)
[83]:
Confidence Intervals#
[84]:
# Generate two time series
ts1 = on.generators.random_walk().generate(start=pd.Timestamp('2022-01-01'), end=pd.Timestamp('2022-12-31'))
ts2 = on.generators.random_walk().generate(start=pd.Timestamp('2022-01-01'), end=pd.Timestamp('2022-12-31'))
[85]:
# First we create the series with two components
ts1_abs = ts1.map(np.abs)
ts2_abs = ts2.map(np.abs)
ts_ci = concatenate([ts1_abs, ts2_abs], axis=1)
ts_ci = on.TimeSeries.from_darts(ts_ci)
ts_ci = ts_ci.rename({'random_walk': 'CI Upper bound', 'random_walk_1': 'CI Lower bound'})
# Then the hypothetical measurement
ts_mid = (ts1_abs + ts2_abs) / 2
ts_mid = ts_mid.rename({'random_walk': 'Measurement'})
[86]:
# Then we plot it
(
on.Plot() # main line
.add(on.marks.area, ts_ci.head(200), title='Confidence interval')
.add(on.marks.line, ts_mid.head(200))
.properties(width=600, height=200)
.show()
)
[86]:
[ ]: