Proof of Concept: Inverse Transforms for Forecasters
[1]:
import matplotlib.pyplot as plt
from merlion.utils import TimeSeries
from ts_datasets.forecast import M4
ts, md = M4(subset="Hourly")[2]
train = TimeSeries.from_pd(ts[md["trainval"]])
test = TimeSeries.from_pd(ts[~md["trainval"]])
ax = plt.figure(figsize=(10, 6)).add_subplot(111)
ax.plot(ts)
ax.axvline(train.to_pd().index[-1], ls="--", c="k")
plt.show()
100%|██████████| 414/414 [00:00<00:00, 861.84it/s]
[2]:
import matplotlib.pyplot as plt
import pandas as pd
from merlion.evaluate.forecast import ForecastMetric
from merlion.models.forecast.base import ForecasterBase
from merlion.models.forecast.prophet import Prophet, ProphetConfig
from merlion.transform.resample import TemporalResample
from merlion.transform.sequence import TransformSequence
from merlion.utils import TimeSeries
def get_model(transform=None):
if transform is not None:
transform = TransformSequence([TemporalResample(), transform])
prophet = Prophet(ProphetConfig(add_seasonality="auto", transform=transform))
return prophet
def eval_model(model: ForecasterBase, train_data: TimeSeries, test_data: TimeSeries,
apply_inverse=True):
og_train = train_data
model.config.invert_transform = apply_inverse
yhat_train, _ = model.train(train_data)
if not apply_inverse:
train_data = model.transform(train_data)
t = test_data.time_stamps
yhat_test, _ = model.forecast(t)
if not apply_inverse:
test_data = model.transform(og_train + test_data).align(reference=t)
print(f"Train sMAPE: {ForecastMetric.sMAPE.value(train_data, yhat_train):.2f}")
print(f"Test sMAPE: {ForecastMetric.sMAPE.value(test_data, yhat_test):.2f}")
ax = plt.figure(figsize=(10, 6)).add_subplot(111)
ax.plot((train_data + test_data).to_pd(), label="true")
ax.plot((yhat_train + yhat_test).to_pd(), label="model")
ax.axvline(pd.to_datetime(t[0], unit="s"), c="k", ls="--")
ax.legend()
plt.show()
return yhat_test
[3]:
print("No transform...")
base = eval_model(get_model(), train, test, apply_inverse=True)
21:14:26 - cmdstanpy - INFO - Chain [1] start processing
21:14:26 - cmdstanpy - INFO - Chain [1] done processing
No transform...
Train sMAPE: 4.88
Test sMAPE: 17.58
[4]:
from merlion.transform.normalize import MeanVarNormalize, MinMaxNormalize
print("Normalize...")
eval_model(get_model(MeanVarNormalize()), train, test, apply_inverse=False)
print("Normalize + invert...")
norm = eval_model(get_model(MeanVarNormalize()), train, test, apply_inverse=True)
Normalize...
21:14:26 - cmdstanpy - INFO - Chain [1] start processing
21:14:26 - cmdstanpy - INFO - Chain [1] done processing
Train sMAPE: 54.41
Test sMAPE: 118.27
Normalize + invert...
21:14:27 - cmdstanpy - INFO - Chain [1] start processing
21:14:27 - cmdstanpy - INFO - Chain [1] done processing
Train sMAPE: 5.73
Test sMAPE: 17.55
[5]:
from merlion.transform.normalize import BoxCoxTransform
print("Box-Cox transform...")
eval_model(get_model(BoxCoxTransform()), train, test, apply_inverse=False)
print("Box-Cox transform + invert...")
boxcox = eval_model(get_model(BoxCoxTransform()), train, test, apply_inverse=True)
Box-Cox transform...
21:14:27 - cmdstanpy - INFO - Chain [1] start processing
21:14:27 - cmdstanpy - INFO - Chain [1] done processing
Train sMAPE: 0.99
Test sMAPE: 3.36
Box-Cox transform + invert...
21:14:28 - cmdstanpy - INFO - Chain [1] start processing
21:14:28 - cmdstanpy - INFO - Chain [1] done processing
Train sMAPE: 3.61
Test sMAPE: 12.30
[6]:
from merlion.transform.moving_average import MovingAverage
print("Moving Average...")
eval_model(get_model(MovingAverage(n_steps=5)), train, test, apply_inverse=False)
print("Moving Average + invert...")
ma = eval_model(get_model(MovingAverage(n_steps=5)), train, test, apply_inverse=True)
Moving Average...
21:14:29 - cmdstanpy - INFO - Chain [1] start processing
21:14:29 - cmdstanpy - INFO - Chain [1] done processing
Train sMAPE: 4.46
Test sMAPE: 17.09
Moving Average + invert...
21:14:29 - cmdstanpy - INFO - Chain [1] start processing
21:14:29 - cmdstanpy - INFO - Chain [1] done processing
Train sMAPE: 5.49
Test sMAPE: 17.88
[7]:
from merlion.transform.moving_average import DifferenceTransform
print("Difference transform...")
eval_model(get_model(DifferenceTransform()), train, test, apply_inverse=False)
print("Difference transform + invert...")
diff = eval_model(get_model(DifferenceTransform()), train, test, apply_inverse=True)
Difference transform...
21:14:30 - cmdstanpy - INFO - Chain [1] start processing
21:14:30 - cmdstanpy - INFO - Chain [1] done processing
Train sMAPE: 53.17
Test sMAPE: 48.12
Difference transform + invert...
21:14:30 - cmdstanpy - INFO - Chain [1] start processing
21:14:30 - cmdstanpy - INFO - Chain [1] done processing
Train sMAPE: 6.47
Test sMAPE: 19.18
[8]:
fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111)
ax.plot(test.to_pd(), label="true")
series = [("original", base), ("norm", norm), ("box-cox", boxcox), ("ma", ma), ("diff", diff)]
smapes = {name: ForecastMetric.sMAPE.value(test, ts) for name, ts in series}
for name, ts in sorted(series, key=lambda ns: smapes[ns[0]]):
smape = smapes[name]
if smape <= max(50, sorted(smapes.values())[:2][-1]):
ax.plot(ts.to_pd(), label=f"{name} (sMAPE={smape:.1f})")
ax.legend()
plt.show()