diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 0f8934c57..44157ceb9 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -40,4 +40,4 @@ jobs:
uv pip install --system "numpy<2" ".[dev]"
- name: Tests
- run: nbdev_test --do_print --timing --n_workers 0 --flags polars
+ run: nbdev_test --do_print --timing --n_workers 0 --flags polars
\ No newline at end of file
diff --git a/action_files/test_models/src/evaluation.py b/action_files/test_models/src/evaluation.py
index e93d0d9e9..cda6e059b 100644
--- a/action_files/test_models/src/evaluation.py
+++ b/action_files/test_models/src/evaluation.py
@@ -41,9 +41,12 @@ def evaluate(model: str, dataset: str, group: str):
if __name__ == '__main__':
groups = ['Monthly']
- models = ['AutoDilatedRNN', 'RNN', 'TCN', 'DeepAR',
+ models = ['AutoDilatedRNN', 'RNN',
+ 'TCN',
+ 'DeepAR',
'NHITS', 'TFT', 'AutoMLP', 'DLinear', 'VanillaTransformer',
- 'BiTCN', 'TiDE', 'DeepNPTS', 'NBEATS', 'KAN']
+ 'BiTCN', 'TiDE', 'DeepNPTS', 'NBEATS', 'KAN'
+ ]
datasets = ['M3']
evaluation = [evaluate(model, dataset, group) for model, group in product(models, groups) for dataset in datasets]
evaluation = [eval_ for eval_ in evaluation if eval_ is not None]
diff --git a/action_files/test_models/src/models.py b/action_files/test_models/src/models.py
index ec32b5a82..96a1a0a3d 100644
--- a/action_files/test_models/src/models.py
+++ b/action_files/test_models/src/models.py
@@ -61,21 +61,22 @@ def main(dataset: str = 'M3', group: str = 'Monthly') -> None:
"random_seed": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
}
config_drnn = {'input_size': tune.choice([2 * horizon]),
- 'encoder_hidden_size': tune.choice([124]),
+ 'encoder_hidden_size': tune.choice([16]),
"max_steps": 300,
"val_check_steps": 100,
- "random_seed": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),}
+ "random_seed": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
+ "scaler_type": "minmax1"}
models = [
AutoDilatedRNN(h=horizon, loss=MAE(), config=config_drnn, num_samples=2, cpus=1),
- RNN(h=horizon, input_size=2 * horizon, encoder_hidden_size=50, max_steps=300),
- TCN(h=horizon, input_size=2 * horizon, encoder_hidden_size=20, max_steps=300),
+ RNN(h=horizon, input_size=2 * horizon, encoder_hidden_size=64, max_steps=300),
+ TCN(h=horizon, input_size=2 * horizon, encoder_hidden_size=64, max_steps=300),
NHITS(h=horizon, input_size=2 * horizon, dropout_prob_theta=0.5, loss=MAE(), max_steps=1000, val_check_steps=500),
AutoMLP(h=horizon, loss=MAE(), config=config, num_samples=2, cpus=1),
DLinear(h=horizon, input_size=2 * horizon, loss=MAE(), max_steps=2000, val_check_steps=500),
TFT(h=horizon, input_size=2 * horizon, loss=SMAPE(), hidden_size=64, scaler_type='robust', windows_batch_size=512, max_steps=1500, val_check_steps=500),
VanillaTransformer(h=horizon, input_size=2 * horizon, loss=MAE(), hidden_size=64, scaler_type='minmax1', windows_batch_size=512, max_steps=1500, val_check_steps=500),
- DeepAR(h=horizon, input_size=2 * horizon, scaler_type='minmax1', max_steps=1000),
+ DeepAR(h=horizon, input_size=2 * horizon, scaler_type='minmax1', max_steps=500),
BiTCN(h=horizon, input_size=2 * horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=500),
TiDE(h=horizon, input_size=2 * horizon, loss=MAE(), max_steps=1000, val_check_steps=500),
DeepNPTS(h=horizon, input_size=2 * horizon, loss=MAE(), max_steps=1000, val_check_steps=500),
diff --git a/action_files/test_models/src/models2.py b/action_files/test_models/src/models2.py
index b309003fb..fe1fbfb6e 100644
--- a/action_files/test_models/src/models2.py
+++ b/action_files/test_models/src/models2.py
@@ -2,35 +2,39 @@
import time
import fire
-import numpy as np
+# import numpy as np
import pandas as pd
-import pytorch_lightning as pl
-import torch
+# import pytorch_lightning as pl
+# import torch
-import neuralforecast
+# import neuralforecast
from neuralforecast.core import NeuralForecast
from neuralforecast.models.gru import GRU
-from neuralforecast.models.rnn import RNN
-from neuralforecast.models.tcn import TCN
+# from neuralforecast.models.rnn import RNN
+# from neuralforecast.models.tcn import TCN
from neuralforecast.models.lstm import LSTM
from neuralforecast.models.dilated_rnn import DilatedRNN
-from neuralforecast.models.deepar import DeepAR
-from neuralforecast.models.mlp import MLP
-from neuralforecast.models.nhits import NHITS
-from neuralforecast.models.nbeats import NBEATS
+# from neuralforecast.models.deepar import DeepAR
+# from neuralforecast.models.mlp import MLP
+# from neuralforecast.models.nhits import NHITS
+# from neuralforecast.models.nbeats import NBEATS
from neuralforecast.models.nbeatsx import NBEATSx
-from neuralforecast.models.tft import TFT
-from neuralforecast.models.vanillatransformer import VanillaTransformer
-from neuralforecast.models.informer import Informer
-from neuralforecast.models.autoformer import Autoformer
-from neuralforecast.models.patchtst import PatchTST
+# from neuralforecast.models.tft import TFT
+# from neuralforecast.models.vanillatransformer import VanillaTransformer
+# from neuralforecast.models.informer import Informer
+# from neuralforecast.models.autoformer import Autoformer
+# from neuralforecast.models.patchtst import PatchTST
from neuralforecast.auto import (
- AutoMLP, AutoNHITS, AutoNBEATS, AutoDilatedRNN, AutoTFT
+ # AutoMLP,
+ AutoNHITS,
+ AutoNBEATS,
+ # AutoDilatedRNN,
+ # AutoTFT
)
-from neuralforecast.losses.pytorch import SMAPE, MAE
+from neuralforecast.losses.pytorch import MAE
from ray import tune
from src.data import get_data
@@ -49,32 +53,18 @@ def main(dataset: str = 'M3', group: str = 'Monthly') -> None:
"scaler_type": "minmax1",
"random_seed": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
}
- config = {
- "hidden_size": tune.choice([256, 512]),
- "num_layers": tune.choice([2, 4]),
- "input_size": tune.choice([2 * horizon]),
- "max_steps": 1000,
- "val_check_steps": 300,
- "scaler_type": "minmax1",
- "random_seed": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
- }
- config_drnn = {'input_size': tune.choice([2 * horizon]),
- 'encoder_hidden_size': tune.choice([124]),
- "max_steps": 300,
- "val_check_steps": 100,
- "random_seed": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),}
models = [
- LSTM(h=horizon, input_size=2 * horizon, encoder_hidden_size=50, max_steps=300),
- DilatedRNN(h=horizon, input_size=2 * horizon, encoder_hidden_size=50, max_steps=300),
- GRU(h=horizon, input_size=2 * horizon, encoder_hidden_size=50, max_steps=300),
+ LSTM(h=horizon, input_size=2 * horizon, encoder_hidden_size=64, max_steps=300),
+ DilatedRNN(h=horizon, input_size=2 * horizon, encoder_hidden_size=64, max_steps=300),
+ GRU(h=horizon, input_size=2 * horizon, encoder_hidden_size=64, max_steps=300),
AutoNBEATS(h=horizon, loss=MAE(), config=config_nbeats, num_samples=2, cpus=1),
AutoNHITS(h=horizon, loss=MAE(), config=config_nbeats, num_samples=2, cpus=1),
NBEATSx(h=horizon, input_size=2 * horizon, loss=MAE(), max_steps=1000),
- PatchTST(h=horizon, input_size=2 * horizon, patch_len=4, stride=4, loss=MAE(), scaler_type='minmax1', windows_batch_size=512, max_steps=1000, val_check_steps=500),
+ # PatchTST(h=horizon, input_size=2 * horizon, patch_len=4, stride=4, loss=MAE(), scaler_type='minmax1', windows_batch_size=512, max_steps=1000, val_check_steps=500),
]
# Models
- for model in models[:-1]:
+ for model in models:
model_name = type(model).__name__
print(50*'-', model_name, 50*'-')
start = time.time()
diff --git a/action_files/test_models/src/multivariate_models.py b/action_files/test_models/src/multivariate_models.py
index 1b1d9593b..8b1577a57 100644
--- a/action_files/test_models/src/multivariate_models.py
+++ b/action_files/test_models/src/multivariate_models.py
@@ -10,7 +10,7 @@
from neuralforecast.models.tsmixer import TSMixer
from neuralforecast.models.tsmixerx import TSMixerx
from neuralforecast.models.itransformer import iTransformer
-# from neuralforecast.models.stemgnn import StemGNN
+# # from neuralforecast.models.stemgnn import StemGNN
from neuralforecast.models.mlpmultivariate import MLPMultivariate
from neuralforecast.models.timemixer import TimeMixer
@@ -26,13 +26,13 @@ def main(dataset: str = 'multivariate', group: str = 'ETTm2') -> None:
train['ds'] = pd.to_datetime(train['ds'])
models = [
- SOFTS(h=horizon, n_series=7, input_size=2 * horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=500),
- TSMixer(h=horizon, n_series=7, input_size=2 * horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=500),
- TSMixerx(h=horizon, n_series=7, input_size=2*horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=500),
- iTransformer(h=horizon, n_series=7, input_size=2 * horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=500),
- # StemGNN(h=horizon, n_series=7, input_size=2*horizon, loss=MAE(), dropout_rate=0.0, max_steps=1000, val_check_steps=500),
- MLPMultivariate(h=horizon, n_series=7, input_size=2*horizon, loss=MAE(), max_steps=1000, val_check_steps=500),
- TimeMixer(h=horizon, n_series=7, input_size=2*horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=500)
+ SOFTS(h=horizon, n_series=7, input_size=2 * horizon, loss=MAE(), dropout=0.0, max_steps=500, val_check_steps=100, windows_batch_size=64, inference_windows_batch_size=64),
+ TSMixer(h=horizon, n_series=7, input_size=2 * horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=100, windows_batch_size=64, inference_windows_batch_size=64),
+ TSMixerx(h=horizon, n_series=7, input_size=2*horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=100, windows_batch_size=64, inference_windows_batch_size=64),
+ iTransformer(h=horizon, n_series=7, input_size=2 * horizon, loss=MAE(), dropout=0.0, max_steps=500, val_check_steps=100, windows_batch_size=64, inference_windows_batch_size=64),
+ # StemGNN(h=horizon, n_series=7, input_size=2*horizon, loss=MAE(), dropout_rate=0.0, max_steps=1000, val_check_steps=500, windows_batch_size=64, inference_windows_batch_size=64),
+ MLPMultivariate(h=horizon, n_series=7, input_size=2*horizon, loss=MAE(), max_steps=1000, val_check_steps=100, windows_batch_size=64, inference_windows_batch_size=64),
+ TimeMixer(h=horizon, n_series=7, input_size=2*horizon, loss=MAE(), dropout=0.0, max_steps=500, val_check_steps=100, windows_batch_size=64, inference_windows_batch_size=64)
]
# Models
diff --git a/nbs/common.base_auto.ipynb b/nbs/common.base_auto.ipynb
index e120c2f33..16db978b4 100644
--- a/nbs/common.base_auto.ipynb
+++ b/nbs/common.base_auto.ipynb
@@ -238,7 +238,11 @@
" self.callbacks = callbacks\n",
"\n",
" # Base Class attributes\n",
- " self.SAMPLING_TYPE = cls_model.SAMPLING_TYPE\n",
+ " self.EXOGENOUS_FUTR = cls_model.EXOGENOUS_FUTR\n",
+ " self.EXOGENOUS_HIST = cls_model.EXOGENOUS_HIST\n",
+ " self.EXOGENOUS_STAT = cls_model.EXOGENOUS_STAT\n",
+ " self.MULTIVARIATE = cls_model.MULTIVARIATE \n",
+ " self.RECURRENT = cls_model.RECURRENT \n",
"\n",
" def __repr__(self):\n",
" return type(self).__name__ if self.alias is None else self.alias\n",
diff --git a/nbs/common.base_model.ipynb b/nbs/common.base_model.ipynb
index 2ae169f8f..fae60e40c 100644
--- a/nbs/common.base_model.ipynb
+++ b/nbs/common.base_model.ipynb
@@ -36,19 +36,25 @@
"from contextlib import contextmanager\n",
"from copy import deepcopy\n",
"from dataclasses import dataclass\n",
+ "from typing import List, Dict, Union\n",
"\n",
"import fsspec\n",
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
+ "import torch.nn.functional as F\n",
"import pytorch_lightning as pl\n",
+ "import neuralforecast.losses.pytorch as losses\n",
+ "\n",
+ "from neuralforecast.losses.pytorch import BasePointLoss, DistributionLoss\n",
"from pytorch_lightning.callbacks.early_stopping import EarlyStopping\n",
"from neuralforecast.tsdataset import (\n",
" TimeSeriesDataModule,\n",
" BaseTimeSeriesDataset,\n",
" _DistributedTimeSeriesDataModule,\n",
")\n",
- "from neuralforecast.losses.pytorch import IQLoss"
+ "from neuralforecast.common._scalers import TemporalNorm\n",
+ "from neuralforecast.utils import get_indexer_raise_missing"
]
},
{
@@ -112,27 +118,92 @@
"source": [
"#| export\n",
"class BaseModel(pl.LightningModule):\n",
- " EXOGENOUS_FUTR = True\n",
- " EXOGENOUS_HIST = True\n",
- " EXOGENOUS_STAT = True\n",
+ " EXOGENOUS_FUTR = True # If the model can handle future exogenous variables\n",
+ " EXOGENOUS_HIST = True # If the model can handle historical exogenous variables\n",
+ " EXOGENOUS_STAT = True # If the model can handle static exogenous variables\n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(\n",
" self,\n",
- " random_seed,\n",
- " loss,\n",
- " valid_loss,\n",
- " optimizer,\n",
- " optimizer_kwargs,\n",
- " lr_scheduler,\n",
- " lr_scheduler_kwargs,\n",
- " futr_exog_list,\n",
- " hist_exog_list,\n",
- " stat_exog_list,\n",
- " max_steps,\n",
- " early_stop_patience_steps,\n",
+ " h: int,\n",
+ " input_size: int,\n",
+ " loss: Union[BasePointLoss, DistributionLoss, nn.Module],\n",
+ " valid_loss: Union[BasePointLoss, DistributionLoss, nn.Module],\n",
+ " learning_rate: float,\n",
+ " max_steps: int,\n",
+ " val_check_steps: int,\n",
+ " batch_size: int,\n",
+ " valid_batch_size: Union[int, None],\n",
+ " windows_batch_size: int,\n",
+ " inference_windows_batch_size: Union[int, None],\n",
+ " start_padding_enabled: bool,\n",
+ " n_series: Union[int, None] = None,\n",
+ " n_samples: Union[int, None] = 100,\n",
+ " h_train: int = 1,\n",
+ " inference_input_size: Union[int, None] = None,\n",
+ " step_size: int = 1,\n",
+ " num_lr_decays: int = 0,\n",
+ " early_stop_patience_steps: int = -1,\n",
+ " scaler_type: str = 'identity',\n",
+ " futr_exog_list: Union[List, None] = None,\n",
+ " hist_exog_list: Union[List, None] = None,\n",
+ " stat_exog_list: Union[List, None] = None,\n",
+ " exclude_insample_y: Union[bool, None] = False,\n",
+ " num_workers_loader: Union[int, None] = 0,\n",
+ " drop_last_loader: Union[bool, None] = False,\n",
+ " random_seed: Union[int, None] = 1,\n",
+ " alias: Union[str, None] = None,\n",
+ " optimizer: Union[torch.optim.Optimizer, None] = None,\n",
+ " optimizer_kwargs: Union[Dict, None] = None,\n",
+ " lr_scheduler: Union[torch.optim.lr_scheduler.LRScheduler, None] = None,\n",
+ " lr_scheduler_kwargs: Union[Dict, None] = None,\n",
+ " dataloader_kwargs=None,\n",
" **trainer_kwargs,\n",
" ):\n",
" super().__init__()\n",
+ "\n",
+ " # Multivarariate checks\n",
+ " if self.MULTIVARIATE and n_series is None:\n",
+ " raise Exception(f'{type(self).__name__} is a multivariate model. Please set n_series to the number of unique time series in your dataset.')\n",
+ " if not self.MULTIVARIATE:\n",
+ " if n_series is not None:\n",
+ " warnings.warn(\n",
+ " f'{type(self).__name__} is a univariate model. Parameter n_series is ignored.'\n",
+ " )\n",
+ " n_series = 1\n",
+ " self.n_series = n_series \n",
+ "\n",
+ " # Protections for previous recurrent models\n",
+ " if input_size < 1:\n",
+ " input_size = 3 * h\n",
+ " warnings.warn(\n",
+ " f'Input size too small. Automatically setting input size to 3 * horizon = {input_size}'\n",
+ " )\n",
+ "\n",
+ " if inference_input_size is None:\n",
+ " inference_input_size = input_size \n",
+ " elif inference_input_size is not None and inference_input_size < 1:\n",
+ " inference_input_size = input_size\n",
+ " warnings.warn(\n",
+ " f'Inference input size too small. Automatically setting inference input size to input_size = {input_size}'\n",
+ " )\n",
+ "\n",
+ " # For recurrent models we need one additional input as we need to shift insample_y to use it as input\n",
+ " if self.RECURRENT:\n",
+ " input_size += 1\n",
+ " inference_input_size += 1\n",
+ "\n",
+ " # Attributes needed for recurrent models\n",
+ " self.horizon_backup = h\n",
+ " self.input_size_backup = input_size\n",
+ " self.n_samples = n_samples\n",
+ " if self.RECURRENT:\n",
+ " self.h_train = h_train\n",
+ " self.inference_input_size = inference_input_size\n",
+ " self.rnn_state = None\n",
+ " self.maintain_state = False\n",
+ " \n",
" with warnings.catch_warnings(record=False):\n",
" warnings.filterwarnings('ignore')\n",
" # the following line issues a warning about the loss attribute being saved\n",
@@ -147,8 +218,8 @@
" self.valid_loss = loss\n",
" else:\n",
" self.valid_loss = valid_loss\n",
- " self.train_trajectories = []\n",
- " self.valid_trajectories = []\n",
+ " self.train_trajectories: List = []\n",
+ " self.valid_trajectories: List = []\n",
"\n",
" # Optimization\n",
" if optimizer is not None and not issubclass(optimizer, torch.optim.Optimizer):\n",
@@ -162,7 +233,6 @@
" self.lr_scheduler = lr_scheduler\n",
" self.lr_scheduler_kwargs = lr_scheduler_kwargs if lr_scheduler_kwargs is not None else {}\n",
"\n",
- "\n",
" # Variables\n",
" self.futr_exog_list = list(futr_exog_list) if futr_exog_list is not None else []\n",
" self.hist_exog_list = list(hist_exog_list) if hist_exog_list is not None else []\n",
@@ -181,12 +251,28 @@
" if not self.EXOGENOUS_STAT and self.stat_exog_size > 0:\n",
" raise Exception(f'{type(self).__name__} does not support static exogenous variables.')\n",
"\n",
- " # Implicit Quantile Loss\n",
- " if isinstance(self.loss, IQLoss):\n",
- " if not isinstance(self.valid_loss, IQLoss):\n",
- " raise Exception('Please set valid_loss to IQLoss() when training with IQLoss')\n",
- " if isinstance(self.valid_loss, IQLoss) and not isinstance(self.loss, IQLoss):\n",
- " raise Exception('Please set loss to IQLoss() when validating with IQLoss') \n",
+ " # Protections for loss functions\n",
+ " if isinstance(self.loss, (losses.IQLoss, losses.MQLoss, losses.HuberMQLoss)):\n",
+ " loss_type = type(self.loss)\n",
+ " if not isinstance(self.valid_loss, loss_type):\n",
+ " raise Exception(f'Please set valid_loss={type(self.loss).__name__}() when training with {type(self.loss).__name__}')\n",
+ " if isinstance(self.valid_loss, losses.IQLoss):\n",
+ " valid_loss_type = type(self.valid_loss)\n",
+ " if not isinstance(self.loss, valid_loss_type):\n",
+ " raise Exception(f'Please set loss={type(self.valid_loss).__name__}() when validating with {type(self.valid_loss).__name__}') \n",
+ "\n",
+ " # Deny impossible loss / valid_loss combinations\n",
+ " if isinstance(self.loss, losses.BasePointLoss) and self.valid_loss.is_distribution_output:\n",
+ " raise Exception(f'Validation with distribution loss {type(self.valid_loss).__name__} is not possible when using loss={type(self.loss).__name__}. Please use a point valid_loss (MAE, MSE, ...)')\n",
+ " elif self.valid_loss.is_distribution_output and self.valid_loss is not loss:\n",
+ " # Maybe we should raise a Warning or an Exception here, but meh for now.\n",
+ " self.valid_loss = loss\n",
+ " \n",
+ " if isinstance(self.loss, (losses.relMSE, losses.Accuracy, losses.sCRPS)):\n",
+ " raise Exception(f\"{type(self.loss).__name__} cannot be used for training. Please use another loss function (MAE, MSE, ...)\")\n",
+ " \n",
+ " if isinstance(self.valid_loss, (losses.relMSE)):\n",
+ " raise Exception(f\"{type(self.valid_loss).__name__} cannot be used for validation. Please use another valid_loss (MAE, MSE, ...)\")\n",
"\n",
" ## Trainer arguments ##\n",
" # Max steps, validation steps and check_val_every_n_epoch\n",
@@ -217,7 +303,73 @@
" if trainer_kwargs.get('enable_checkpointing', None) is None:\n",
" trainer_kwargs['enable_checkpointing'] = False\n",
"\n",
+ " # Set other attributes\n",
" self.trainer_kwargs = trainer_kwargs\n",
+ " self.h = h\n",
+ " self.input_size = input_size\n",
+ " self.windows_batch_size = windows_batch_size\n",
+ " self.start_padding_enabled = start_padding_enabled\n",
+ "\n",
+ " # Padder to complete train windows, \n",
+ " # example y=[1,2,3,4,5] h=3 -> last y_output = [5,0,0]\n",
+ " if start_padding_enabled:\n",
+ " self.padder_train = nn.ConstantPad1d(padding=(self.input_size-1, self.h), value=0.0)\n",
+ " else:\n",
+ " self.padder_train = nn.ConstantPad1d(padding=(0, self.h), value=0.0)\n",
+ "\n",
+ " # Batch sizes\n",
+ " if self.MULTIVARIATE and n_series is not None:\n",
+ " self.batch_size = max(batch_size, n_series)\n",
+ " else:\n",
+ " self.batch_size = batch_size\n",
+ " if valid_batch_size is None:\n",
+ " self.valid_batch_size = batch_size\n",
+ " else:\n",
+ " self.valid_batch_size = valid_batch_size\n",
+ " if inference_windows_batch_size is None:\n",
+ " self.inference_windows_batch_size = windows_batch_size\n",
+ " else:\n",
+ " self.inference_windows_batch_size = inference_windows_batch_size\n",
+ "\n",
+ " # Optimization \n",
+ " self.learning_rate = learning_rate\n",
+ " self.max_steps = max_steps\n",
+ " self.num_lr_decays = num_lr_decays\n",
+ " self.lr_decay_steps = (\n",
+ " max(max_steps // self.num_lr_decays, 1) if self.num_lr_decays > 0 else 10e7\n",
+ " )\n",
+ " self.early_stop_patience_steps = early_stop_patience_steps\n",
+ " self.val_check_steps = val_check_steps\n",
+ " self.windows_batch_size = windows_batch_size\n",
+ " self.step_size = step_size\n",
+ " \n",
+ " # If the model does not support exogenous, it can't support exclude_insample_y\n",
+ " if exclude_insample_y and not (self.EXOGENOUS_FUTR or self.EXOGENOUS_HIST or self.EXOGENOUS_STAT):\n",
+ " raise Exception(f'{type(self).__name__} does not support `exclude_insample_y=True`. Please set `exclude_insample_y=False`')\n",
+ "\n",
+ " self.exclude_insample_y = exclude_insample_y\n",
+ "\n",
+ " # Scaler\n",
+ " self.scaler = TemporalNorm(\n",
+ " scaler_type=scaler_type,\n",
+ " dim=1, # Time dimension is 1.\n",
+ " num_features= 1 + len(self.hist_exog_list) + len(self.futr_exog_list)\n",
+ " )\n",
+ "\n",
+ " # Fit arguments\n",
+ " self.val_size = 0\n",
+ " self.test_size = 0\n",
+ "\n",
+ " # Model state\n",
+ " self.decompose_forecast = False\n",
+ "\n",
+ " # DataModule arguments\n",
+ " self.num_workers_loader = num_workers_loader\n",
+ " self.dataloader_kwargs = dataloader_kwargs\n",
+ " self.drop_last_loader = drop_last_loader\n",
+ " # used by on_validation_epoch_end hook\n",
+ " self.validation_step_outputs: List = []\n",
+ " self.alias = alias\n",
"\n",
" def __repr__(self):\n",
" return type(self).__name__ if self.alias is None else self.alias\n",
@@ -246,21 +398,11 @@
" set(temporal_cols.tolist()) & set(self.hist_exog_list + self.futr_exog_list)\n",
" )\n",
" \n",
- " def _set_quantile_for_iqloss(self, **data_module_kwargs):\n",
- " if \"quantile\" in data_module_kwargs:\n",
- " if not isinstance(self.loss, IQLoss):\n",
- " raise Exception(\n",
- " \"Please train with loss=IQLoss() to make use of the quantile argument.\"\n",
- " )\n",
- " else:\n",
- " self.quantile = data_module_kwargs[\"quantile\"]\n",
- " data_module_kwargs.pop(\"quantile\")\n",
- " self.loss.update_quantile(q=self.quantile)\n",
- " elif isinstance(self.loss, IQLoss):\n",
- " self.quantile = 0.5\n",
- " self.loss.update_quantile(q=self.quantile)\n",
- "\n",
- " return data_module_kwargs\n",
+ " def _set_quantiles(self, quantiles=None):\n",
+ " if quantiles is None and isinstance(self.loss, losses.IQLoss):\n",
+ " self.loss.update_quantile(q=[0.5])\n",
+ " elif hasattr(self.loss, 'update_quantile') and callable(self.loss.update_quantile):\n",
+ " self.loss.update_quantile(q=quantiles)\n",
"\n",
" def _fit_distributed(\n",
" self,\n",
@@ -480,7 +622,792 @@
" model.load_state_dict(content[\"state_dict\"], strict=True, assign=True)\n",
" else: # pytorch<2.1\n",
" model.load_state_dict(content[\"state_dict\"], strict=True)\n",
- " return model"
+ " return model\n",
+ "\n",
+ " def _create_windows(self, batch, step, w_idxs=None):\n",
+ " # Parse common data\n",
+ " window_size = self.input_size + self.h\n",
+ " temporal_cols = batch['temporal_cols']\n",
+ " temporal = batch['temporal'] \n",
+ "\n",
+ " if step == 'train':\n",
+ " if self.val_size + self.test_size > 0:\n",
+ " cutoff = -self.val_size - self.test_size\n",
+ " temporal = temporal[:, :, :cutoff]\n",
+ "\n",
+ " temporal = self.padder_train(temporal)\n",
+ " \n",
+ " if temporal.shape[-1] < window_size:\n",
+ " raise Exception('Time series is too short for training, consider setting a smaller input size or set start_padding_enabled=True')\n",
+ " \n",
+ " windows = temporal.unfold(dimension=-1, \n",
+ " size=window_size, \n",
+ " step=self.step_size)\n",
+ "\n",
+ " if self.MULTIVARIATE:\n",
+ " # [n_series, C, Ws, L + h] -> [Ws, L + h, C, n_series]\n",
+ " windows = windows.permute(2, 3, 1, 0)\n",
+ " else:\n",
+ " # [n_series, C, Ws, L + h] -> [Ws * n_series, L + h, C, 1]\n",
+ " windows_per_serie = windows.shape[2]\n",
+ " windows = windows.permute(0, 2, 3, 1)\n",
+ " windows = windows.flatten(0, 1)\n",
+ " windows = windows.unsqueeze(-1)\n",
+ "\n",
+ " # Sample and Available conditions\n",
+ " available_idx = temporal_cols.get_loc('available_mask') \n",
+ " available_condition = windows[:, :self.input_size, available_idx]\n",
+ " available_condition = torch.sum(available_condition, axis=(1, -1)) # Sum over time & series dimension\n",
+ " final_condition = (available_condition > 0)\n",
+ " \n",
+ " if self.h > 0:\n",
+ " sample_condition = windows[:, self.input_size:, available_idx]\n",
+ " sample_condition = torch.sum(sample_condition, axis=(1, -1)) # Sum over time & series dimension\n",
+ " final_condition = (sample_condition > 0) & (available_condition > 0)\n",
+ " \n",
+ " windows = windows[final_condition]\n",
+ " \n",
+ " # Parse Static data to match windows\n",
+ " static = batch.get('static', None)\n",
+ " static_cols=batch.get('static_cols', None)\n",
+ "\n",
+ " # Repeat static if univariate: [n_series, S] -> [Ws * n_series, S]\n",
+ " if static is not None and not self.MULTIVARIATE:\n",
+ " static = torch.repeat_interleave(static, \n",
+ " repeats=windows_per_serie, dim=0)\n",
+ " static = static[final_condition] \n",
+ "\n",
+ " # Protection of empty windows\n",
+ " if final_condition.sum() == 0:\n",
+ " raise Exception('No windows available for training')\n",
+ "\n",
+ " # Sample windows\n",
+ " if self.windows_batch_size is not None:\n",
+ " n_windows = windows.shape[0]\n",
+ " w_idxs = np.random.choice(n_windows, \n",
+ " size=self.windows_batch_size,\n",
+ " replace=(n_windows < self.windows_batch_size))\n",
+ " windows = windows[w_idxs]\n",
+ " \n",
+ " if static is not None and not self.MULTIVARIATE:\n",
+ " static = static[w_idxs]\n",
+ "\n",
+ " windows_batch = dict(temporal=windows,\n",
+ " temporal_cols=temporal_cols,\n",
+ " static=static,\n",
+ " static_cols=static_cols)\n",
+ " return windows_batch\n",
+ "\n",
+ " elif step in ['predict', 'val']:\n",
+ "\n",
+ " if step == 'predict':\n",
+ " initial_input = temporal.shape[-1] - self.test_size\n",
+ " if initial_input <= self.input_size: # There is not enough data to predict first timestamp\n",
+ " temporal = F.pad(temporal, pad=(self.input_size-initial_input, 0), mode=\"constant\", value=0.0)\n",
+ " predict_step_size = self.predict_step_size\n",
+ " cutoff = - self.input_size - self.test_size\n",
+ " temporal = temporal[:, :, cutoff:]\n",
+ "\n",
+ " elif step == 'val':\n",
+ " predict_step_size = self.step_size\n",
+ " cutoff = -self.input_size - self.val_size - self.test_size\n",
+ " if self.test_size > 0:\n",
+ " temporal = batch['temporal'][:, :, cutoff:-self.test_size]\n",
+ " else:\n",
+ " temporal = batch['temporal'][:, :, cutoff:]\n",
+ " if temporal.shape[-1] < window_size:\n",
+ " initial_input = temporal.shape[-1] - self.val_size\n",
+ " temporal = F.pad(temporal, pad=(self.input_size-initial_input, 0), mode=\"constant\", value=0.0)\n",
+ "\n",
+ " if (step=='predict') and (self.test_size==0) and (len(self.futr_exog_list)==0):\n",
+ " temporal = F.pad(temporal, pad=(0, self.h), mode=\"constant\", value=0.0)\n",
+ "\n",
+ " windows = temporal.unfold(dimension=-1,\n",
+ " size=window_size,\n",
+ " step=predict_step_size)\n",
+ "\n",
+ " static = batch.get('static', None)\n",
+ " static_cols=batch.get('static_cols', None)\n",
+ "\n",
+ " if self.MULTIVARIATE:\n",
+ " # [n_series, C, Ws, L + h] -> [Ws, L + h, C, n_series]\n",
+ " windows = windows.permute(2, 3, 1, 0)\n",
+ " else:\n",
+ " # [n_series, C, Ws, L + h] -> [Ws * n_series, L + h, C, 1]\n",
+ " windows_per_serie = windows.shape[2]\n",
+ " windows = windows.permute(0, 2, 3, 1)\n",
+ " windows = windows.flatten(0, 1)\n",
+ " windows = windows.unsqueeze(-1)\n",
+ " if static is not None:\n",
+ " static = torch.repeat_interleave(static, \n",
+ " repeats=windows_per_serie, dim=0)\n",
+ "\n",
+ " # Sample windows for batched prediction\n",
+ " if w_idxs is not None:\n",
+ " windows = windows[w_idxs]\n",
+ " if static is not None and not self.MULTIVARIATE:\n",
+ " static = static[w_idxs]\n",
+ "\n",
+ " windows_batch = dict(temporal=windows,\n",
+ " temporal_cols=temporal_cols,\n",
+ " static=static,\n",
+ " static_cols=static_cols)\n",
+ " return windows_batch\n",
+ " else:\n",
+ " raise ValueError(f'Unknown step {step}') \n",
+ "\n",
+ " def _normalization(self, windows, y_idx):\n",
+ " # windows are already filtered by train/validation/test\n",
+ " # from the `create_windows_method` nor leakage risk\n",
+ " temporal = windows['temporal'] # [Ws, L + h, C, n_series]\n",
+ " temporal_cols = windows['temporal_cols'].copy() # [Ws, L + h, C, n_series]\n",
+ "\n",
+ " # To avoid leakage uses only the lags\n",
+ " temporal_data_cols = self._get_temporal_exogenous_cols(temporal_cols=temporal_cols)\n",
+ " temporal_idxs = get_indexer_raise_missing(temporal_cols, temporal_data_cols)\n",
+ " temporal_idxs = np.append(y_idx, temporal_idxs)\n",
+ " temporal_data = temporal[:, :, temporal_idxs] \n",
+ " temporal_mask = temporal[:, :, temporal_cols.get_loc('available_mask')].clone()\n",
+ " if self.h > 0:\n",
+ " temporal_mask[:, -self.h:] = 0.0\n",
+ "\n",
+ " # Normalize. self.scaler stores the shift and scale for inverse transform\n",
+ " temporal_mask = temporal_mask.unsqueeze(2) # Add channel dimension for scaler.transform.\n",
+ " temporal_data = self.scaler.transform(x=temporal_data, mask=temporal_mask)\n",
+ "\n",
+ " # Replace values in windows dict\n",
+ " temporal[:, :, temporal_idxs] = temporal_data\n",
+ " windows['temporal'] = temporal\n",
+ "\n",
+ " return windows\n",
+ "\n",
+ " def _inv_normalization(self, y_hat, y_idx):\n",
+ " # Receives window predictions [Ws, h, output, n_series]\n",
+ " # Broadcasts scale if necessary and inverts normalization\n",
+ " add_channel_dim = y_hat.ndim > 3\n",
+ " y_loc, y_scale = self._get_loc_scale(y_idx, add_channel_dim=add_channel_dim)\n",
+ " y_hat = self.scaler.inverse_transform(z=y_hat, x_scale=y_scale, x_shift=y_loc)\n",
+ "\n",
+ " return y_hat\n",
+ "\n",
+ " def _parse_windows(self, batch, windows):\n",
+ " # windows: [Ws, L + h, C, n_series]\n",
+ "\n",
+ " # Filter insample lags from outsample horizon\n",
+ " y_idx = batch['y_idx']\n",
+ " mask_idx = batch['temporal_cols'].get_loc('available_mask')\n",
+ "\n",
+ " insample_y = windows['temporal'][:, :self.input_size, y_idx]\n",
+ " insample_mask = windows['temporal'][:, :self.input_size, mask_idx]\n",
+ "\n",
+ " # Declare additional information\n",
+ " outsample_y = None\n",
+ " outsample_mask = None\n",
+ " hist_exog = None\n",
+ " futr_exog = None\n",
+ " stat_exog = None\n",
+ "\n",
+ " if self.h > 0:\n",
+ " outsample_y = windows['temporal'][:, self.input_size:, y_idx]\n",
+ " outsample_mask = windows['temporal'][:, self.input_size:, mask_idx]\n",
+ "\n",
+ " # Recurrent models at t predict t+1, so we shift the input (insample_y) by one\n",
+ " if self.RECURRENT:\n",
+ " insample_y = torch.cat((insample_y, outsample_y[:, :-1]), dim=1)\n",
+ " insample_mask = torch.cat((insample_mask, outsample_mask[:, :-1]), dim=1)\n",
+ " self.maintain_state = False\n",
+ "\n",
+ " if len(self.hist_exog_list):\n",
+ " hist_exog_idx = get_indexer_raise_missing(windows['temporal_cols'], self.hist_exog_list)\n",
+ " if self.RECURRENT:\n",
+ " hist_exog = windows['temporal'][:, :, hist_exog_idx]\n",
+ " hist_exog[:, self.input_size:] = 0.0\n",
+ " hist_exog = hist_exog[:, 1:]\n",
+ " else:\n",
+ " hist_exog = windows['temporal'][:, :self.input_size, hist_exog_idx]\n",
+ " if not self.MULTIVARIATE:\n",
+ " hist_exog = hist_exog.squeeze(-1)\n",
+ " else:\n",
+ " hist_exog = hist_exog.swapaxes(1, 2)\n",
+ "\n",
+ " if len(self.futr_exog_list):\n",
+ " futr_exog_idx = get_indexer_raise_missing(windows['temporal_cols'], self.futr_exog_list)\n",
+ " futr_exog = windows['temporal'][:, :, futr_exog_idx]\n",
+ " if self.RECURRENT:\n",
+ " futr_exog = futr_exog[:, 1:]\n",
+ " if not self.MULTIVARIATE:\n",
+ " futr_exog = futr_exog.squeeze(-1)\n",
+ " else:\n",
+ " futr_exog = futr_exog.swapaxes(1, 2) \n",
+ "\n",
+ " if len(self.stat_exog_list):\n",
+ " static_idx = get_indexer_raise_missing(windows['static_cols'], self.stat_exog_list)\n",
+ " stat_exog = windows['static'][:, static_idx]\n",
+ "\n",
+ " # TODO: think a better way of removing insample_y features\n",
+ " if self.exclude_insample_y:\n",
+ " insample_y = insample_y * 0\n",
+ "\n",
+ " return insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
+ " hist_exog, futr_exog, stat_exog \n",
+ "\n",
+ " def _get_loc_scale(self, y_idx, add_channel_dim=False):\n",
+ " # [B, L, C, n_series] -> [B, L, n_series]\n",
+ " y_scale = self.scaler.x_scale[:, :, y_idx]\n",
+ " y_loc = self.scaler.x_shift[:, :, y_idx]\n",
+ " \n",
+ " # [B, L, n_series] -> [B, L, n_series, 1]\n",
+ " if add_channel_dim:\n",
+ " y_scale = y_scale.unsqueeze(-1)\n",
+ " y_loc = y_loc.unsqueeze(-1)\n",
+ "\n",
+ " return y_loc, y_scale\n",
+ "\n",
+ " def _compute_valid_loss(self, insample_y, outsample_y, output, outsample_mask, y_idx):\n",
+ " if self.loss.is_distribution_output:\n",
+ " y_loc, y_scale = self._get_loc_scale(y_idx)\n",
+ " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
+ " if isinstance(self.valid_loss, (losses.sCRPS, losses.MQLoss, losses.HuberMQLoss)):\n",
+ " _, _, quants = self.loss.sample(distr_args=distr_args) \n",
+ " output = quants\n",
+ " elif isinstance(self.valid_loss, losses.BasePointLoss):\n",
+ " distr = self.loss.get_distribution(distr_args=distr_args)\n",
+ " output = distr.mean\n",
+ "\n",
+ " # Validation Loss evaluation\n",
+ " if self.valid_loss.is_distribution_output:\n",
+ " valid_loss = self.valid_loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)\n",
+ " else:\n",
+ " output = self._inv_normalization(y_hat=output, y_idx=y_idx)\n",
+ " valid_loss = self.valid_loss(y=outsample_y, y_hat=output, y_insample=insample_y, mask=outsample_mask)\n",
+ " return valid_loss\n",
+ " \n",
+ " def _validate_step_recurrent_batch(self, insample_y, insample_mask, futr_exog, hist_exog, stat_exog, y_idx):\n",
+ " # Remember state in network and set horizon to 1\n",
+ " self.rnn_state = None\n",
+ " self.maintain_state = True\n",
+ " self.h = 1\n",
+ "\n",
+ " # Initialize results array\n",
+ " n_outputs = self.loss.outputsize_multiplier\n",
+ " y_hat = torch.zeros((insample_y.shape[0],\n",
+ " self.horizon_backup,\n",
+ " self.n_series * n_outputs),\n",
+ " device=insample_y.device,\n",
+ " dtype=insample_y.dtype)\n",
+ "\n",
+ " # First step prediction\n",
+ " tau = 0\n",
+ " \n",
+ " # Set exogenous\n",
+ " hist_exog_current = None\n",
+ " if self.hist_exog_size > 0:\n",
+ " hist_exog_current = hist_exog[:, :self.input_size + tau - 1]\n",
+ "\n",
+ " futr_exog_current = None\n",
+ " if self.futr_exog_size > 0:\n",
+ " futr_exog_current = futr_exog[:, :self.input_size + tau - 1]\n",
+ "\n",
+ " # First forecast step\n",
+ " y_hat[:, tau], insample_y = self._validate_step_recurrent_single(\n",
+ " insample_y=insample_y[:, :self.input_size + tau - 1],\n",
+ " insample_mask=insample_mask[:, :self.input_size + tau - 1],\n",
+ " hist_exog=hist_exog_current,\n",
+ " futr_exog=futr_exog_current,\n",
+ " stat_exog=stat_exog,\n",
+ " y_idx=y_idx,\n",
+ " )\n",
+ "\n",
+ " # Horizon prediction recursively\n",
+ " for tau in range(self.horizon_backup):\n",
+ " # Set exogenous\n",
+ " if self.hist_exog_size > 0:\n",
+ " hist_exog_current = hist_exog[:, self.input_size + tau - 1].unsqueeze(1)\n",
+ "\n",
+ " if self.futr_exog_size > 0:\n",
+ " futr_exog_current = futr_exog[:, self.input_size + tau - 1].unsqueeze(1)\n",
+ " \n",
+ " y_hat[:, tau], insample_y = self._validate_step_recurrent_single(\n",
+ " insample_y=insample_y,\n",
+ " insample_mask=None,\n",
+ " hist_exog=hist_exog_current,\n",
+ " futr_exog=futr_exog_current,\n",
+ " stat_exog=stat_exog,\n",
+ " y_idx = y_idx,\n",
+ " )\n",
+ " \n",
+ " # Reset state and horizon\n",
+ " self.maintain_state = False\n",
+ " self.rnn_state = None\n",
+ " self.h = self.horizon_backup\n",
+ "\n",
+ " return y_hat \n",
+ "\n",
+ " def _validate_step_recurrent_single(self, insample_y, insample_mask, hist_exog, futr_exog, stat_exog, y_idx):\n",
+ " # Input sequence\n",
+ " windows_batch = dict(insample_y=insample_y, # [Ws, L, n_series]\n",
+ " insample_mask=insample_mask, # [Ws, L, n_series]\n",
+ " futr_exog=futr_exog, # univariate: [Ws, L, F]; multivariate: [Ws, F, L, n_series]\n",
+ " hist_exog=hist_exog, # univariate: [Ws, L, X]; multivariate: [Ws, X, L, n_series]\n",
+ " stat_exog=stat_exog) # univariate: [Ws, S]; multivariate: [n_series, S]\n",
+ "\n",
+ " # Model Predictions\n",
+ " output_batch_unmapped = self(windows_batch)\n",
+ " output_batch = self.loss.domain_map(output_batch_unmapped)\n",
+ " \n",
+ " # Inverse normalization and sampling\n",
+ " if self.loss.is_distribution_output:\n",
+ " # Sample distribution\n",
+ " y_loc, y_scale = self._get_loc_scale(y_idx)\n",
+ " distr_args = self.loss.scale_decouple(output=output_batch, loc=y_loc, scale=y_scale)\n",
+ " # When validating, the output is the mean of the distribution which is an attribute\n",
+ " distr = self.loss.get_distribution(distr_args=distr_args)\n",
+ "\n",
+ " # Scale back to feed back as input\n",
+ " insample_y = self.scaler.scaler(distr.mean, y_loc, y_scale)\n",
+ " else:\n",
+ " # Todo: for now, we assume that in case of a BasePointLoss with ndim==4, the last dimension\n",
+ " # contains a set of predictions for the target (e.g. MQLoss multiple quantiles), for which we use the \n",
+ " # mean as feedback signal for the recurrent predictions. A more precise way is to increase the\n",
+ " # insample input size of the recurrent network by the number of outputs so that each output\n",
+ " # can be fed back to a specific input channel. \n",
+ " if output_batch.ndim == 4:\n",
+ " output_batch = output_batch.mean(dim=-1)\n",
+ "\n",
+ " insample_y = output_batch\n",
+ "\n",
+ " # Remove horizon dim: [B, 1, N * n_outputs] -> [B, N * n_outputs]\n",
+ " y_hat = output_batch_unmapped.squeeze(1)\n",
+ " return y_hat, insample_y\n",
+ "\n",
+ " def _predict_step_recurrent_batch(self, insample_y, insample_mask, futr_exog, hist_exog, stat_exog, y_idx):\n",
+ " # Remember state in network and set horizon to 1\n",
+ " self.rnn_state = None\n",
+ " self.maintain_state = True\n",
+ " self.h = 1\n",
+ "\n",
+ " # Initialize results array\n",
+ " n_outputs = len(self.loss.output_names)\n",
+ " y_hat = torch.zeros((insample_y.shape[0],\n",
+ " self.horizon_backup,\n",
+ " self.n_series,\n",
+ " n_outputs),\n",
+ " device=insample_y.device,\n",
+ " dtype=insample_y.dtype)\n",
+ "\n",
+ " # First step prediction\n",
+ " tau = 0\n",
+ " \n",
+ " # Set exogenous\n",
+ " hist_exog_current = None\n",
+ " if self.hist_exog_size > 0:\n",
+ " hist_exog_current = hist_exog[:, :self.input_size + tau - 1]\n",
+ "\n",
+ " futr_exog_current = None\n",
+ " if self.futr_exog_size > 0:\n",
+ " futr_exog_current = futr_exog[:, :self.input_size + tau - 1]\n",
+ "\n",
+ " # First forecast step\n",
+ " y_hat[:, tau], insample_y = self._predict_step_recurrent_single(\n",
+ " insample_y=insample_y[:, :self.input_size + tau - 1],\n",
+ " insample_mask=insample_mask[:, :self.input_size + tau - 1],\n",
+ " hist_exog=hist_exog_current,\n",
+ " futr_exog=futr_exog_current,\n",
+ " stat_exog=stat_exog,\n",
+ " y_idx=y_idx,\n",
+ " )\n",
+ "\n",
+ " # Horizon prediction recursively\n",
+ " for tau in range(self.horizon_backup):\n",
+ " # Set exogenous\n",
+ " if self.hist_exog_size > 0:\n",
+ " hist_exog_current = hist_exog[:, self.input_size + tau - 1].unsqueeze(1)\n",
+ "\n",
+ " if self.futr_exog_size > 0:\n",
+ " futr_exog_current = futr_exog[:, self.input_size + tau - 1].unsqueeze(1)\n",
+ " \n",
+ " y_hat[:, tau], insample_y = self._predict_step_recurrent_single(\n",
+ " insample_y=insample_y,\n",
+ " insample_mask=None,\n",
+ " hist_exog=hist_exog_current,\n",
+ " futr_exog=futr_exog_current,\n",
+ " stat_exog=stat_exog,\n",
+ " y_idx = y_idx,\n",
+ " )\n",
+ " \n",
+ " # Reset state and horizon\n",
+ " self.maintain_state = False\n",
+ " self.rnn_state = None\n",
+ " self.h = self.horizon_backup\n",
+ "\n",
+ " # Squeeze for univariate case\n",
+ " if not self.MULTIVARIATE:\n",
+ " y_hat = y_hat.squeeze(2)\n",
+ "\n",
+ " return y_hat \n",
+ "\n",
+ " def _predict_step_recurrent_single(self, insample_y, insample_mask, hist_exog, futr_exog, stat_exog, y_idx):\n",
+ " # Input sequence\n",
+ " windows_batch = dict(insample_y=insample_y, # [Ws, L, n_series]\n",
+ " insample_mask=insample_mask, # [Ws, L, n_series]\n",
+ " futr_exog=futr_exog, # univariate: [Ws, L, F]; multivariate: [Ws, F, L, n_series]\n",
+ " hist_exog=hist_exog, # univariate: [Ws, L, X]; multivariate: [Ws, X, L, n_series]\n",
+ " stat_exog=stat_exog) # univariate: [Ws, S]; multivariate: [n_series, S]\n",
+ "\n",
+ " # Model Predictions\n",
+ " output_batch_unmapped = self(windows_batch)\n",
+ " output_batch = self.loss.domain_map(output_batch_unmapped)\n",
+ " \n",
+ " # Inverse normalization and sampling\n",
+ " if self.loss.is_distribution_output:\n",
+ " # Sample distribution\n",
+ " y_loc, y_scale = self._get_loc_scale(y_idx)\n",
+ " distr_args = self.loss.scale_decouple(output=output_batch, loc=y_loc, scale=y_scale)\n",
+ " # When predicting, we need to sample to get the quantiles. The mean is an attribute.\n",
+ " _, _, quants = self.loss.sample(distr_args=distr_args, num_samples=self.n_samples)\n",
+ " mean = self.loss.distr_mean\n",
+ "\n",
+ " # Scale back to feed back as input\n",
+ " insample_y = self.scaler.scaler(mean, y_loc, y_scale)\n",
+ " \n",
+ " # Save predictions\n",
+ " y_hat = torch.concat((mean.unsqueeze(-1), quants), axis=-1)\n",
+ "\n",
+ " if self.loss.return_params:\n",
+ " distr_args = torch.stack(distr_args, dim=-1)\n",
+ " if distr_args.ndim > 4:\n",
+ " distr_args = distr_args.flatten(-2, -1)\n",
+ " y_hat = torch.concat((y_hat, distr_args), axis=-1)\n",
+ " else:\n",
+ " # Todo: for now, we assume that in case of a BasePointLoss with ndim==4, the last dimension\n",
+ " # contains a set of predictions for the target (e.g. MQLoss multiple quantiles), for which we use the \n",
+ " # mean as feedback signal for the recurrent predictions. A more precise way is to increase the\n",
+ " # insample input size of the recurrent network by the number of outputs so that each output\n",
+ " # can be fed back to a specific input channel. \n",
+ " if output_batch.ndim == 4:\n",
+ " output_batch = output_batch.mean(dim=-1)\n",
+ "\n",
+ " insample_y = output_batch\n",
+ " y_hat = self._inv_normalization(y_hat=output_batch, y_idx=y_idx)\n",
+ " y_hat = y_hat.unsqueeze(-1)\n",
+ "\n",
+ " # Remove horizon dim: [B, 1, N, n_outputs] -> [B, N, n_outputs]\n",
+ " y_hat = y_hat.squeeze(1)\n",
+ " return y_hat, insample_y\n",
+ "\n",
+ " def _predict_step_direct_batch(self, insample_y, insample_mask, hist_exog, futr_exog, stat_exog, y_idx):\n",
+ " windows_batch = dict(insample_y=insample_y, # [Ws, L, n_series]\n",
+ " insample_mask=insample_mask, # [Ws, L, n_series]\n",
+ " futr_exog=futr_exog, # univariate: [Ws, L, F]; multivariate: [Ws, F, L, n_series]\n",
+ " hist_exog=hist_exog, # univariate: [Ws, L, X]; multivariate: [Ws, X, L, n_series]\n",
+ " stat_exog=stat_exog) # univariate: [Ws, S]; multivariate: [n_series, S]\n",
+ "\n",
+ " # Model Predictions\n",
+ " output_batch = self(windows_batch)\n",
+ " output_batch = self.loss.domain_map(output_batch)\n",
+ "\n",
+ " # Inverse normalization and sampling\n",
+ " if self.loss.is_distribution_output:\n",
+ " y_loc, y_scale = self._get_loc_scale(y_idx)\n",
+ " distr_args = self.loss.scale_decouple(output=output_batch, loc=y_loc, scale=y_scale)\n",
+ " _, sample_mean, quants = self.loss.sample(distr_args=distr_args)\n",
+ " y_hat = torch.concat((sample_mean, quants), axis=-1)\n",
+ "\n",
+ " if self.loss.return_params:\n",
+ " distr_args = torch.stack(distr_args, dim=-1)\n",
+ " if distr_args.ndim > 4:\n",
+ " distr_args = distr_args.flatten(-2, -1)\n",
+ " y_hat = torch.concat((y_hat, distr_args), axis=-1) \n",
+ " else:\n",
+ " y_hat = self._inv_normalization(y_hat=output_batch, \n",
+ " y_idx=y_idx)\n",
+ "\n",
+ " return y_hat\n",
+ " \n",
+ " def training_step(self, batch, batch_idx):\n",
+ " # Set horizon to h_train in case of recurrent model to speed up training\n",
+ " if self.RECURRENT:\n",
+ " self.h = self.h_train\n",
+ " \n",
+ " # windows: [Ws, L + h, C, n_series] or [Ws, L + h, C]\n",
+ " y_idx = batch['y_idx']\n",
+ "\n",
+ " windows = self._create_windows(batch, step='train')\n",
+ " original_outsample_y = torch.clone(windows['temporal'][:, self.input_size:, y_idx])\n",
+ " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
+ " \n",
+ " # Parse windows\n",
+ " insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
+ " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
+ "\n",
+ " windows_batch = dict(insample_y=insample_y, # [Ws, L, n_series]\n",
+ " insample_mask=insample_mask, # [Ws, L, n_series]\n",
+ " futr_exog=futr_exog, # univariate: [Ws, L, F]; multivariate: [Ws, F, L, n_series]\n",
+ " hist_exog=hist_exog, # univariate: [Ws, L, X]; multivariate: [Ws, X, L, n_series]\n",
+ " stat_exog=stat_exog) # univariate: [Ws, S]; multivariate: [n_series, S]\n",
+ "\n",
+ " # Model Predictions\n",
+ " output = self(windows_batch)\n",
+ " output = self.loss.domain_map(output)\n",
+ " \n",
+ " if self.loss.is_distribution_output:\n",
+ " y_loc, y_scale = self._get_loc_scale(y_idx)\n",
+ " outsample_y = original_outsample_y\n",
+ " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
+ " loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)\n",
+ " else:\n",
+ " loss = self.loss(y=outsample_y, y_hat=output, y_insample=insample_y, mask=outsample_mask)\n",
+ "\n",
+ " if torch.isnan(loss):\n",
+ " print('Model Parameters', self.hparams)\n",
+ " print('insample_y', torch.isnan(insample_y).sum())\n",
+ " print('outsample_y', torch.isnan(outsample_y).sum())\n",
+ " raise Exception('Loss is NaN, training stopped.')\n",
+ "\n",
+ " train_loss_log = loss.detach().item()\n",
+ " self.log(\n",
+ " 'train_loss',\n",
+ " train_loss_log,\n",
+ " batch_size=outsample_y.size(0),\n",
+ " prog_bar=True,\n",
+ " on_epoch=True,\n",
+ " )\n",
+ " self.train_trajectories.append((self.global_step, train_loss_log))\n",
+ "\n",
+ " self.h = self.horizon_backup\n",
+ "\n",
+ " return loss\n",
+ "\n",
+ "\n",
+ " def validation_step(self, batch, batch_idx):\n",
+ " if self.val_size == 0:\n",
+ " return np.nan\n",
+ "\n",
+ " # TODO: Hack to compute number of windows\n",
+ " windows = self._create_windows(batch, step='val')\n",
+ " n_windows = len(windows['temporal'])\n",
+ " y_idx = batch['y_idx']\n",
+ "\n",
+ " # Number of windows in batch\n",
+ " windows_batch_size = self.inference_windows_batch_size\n",
+ " if windows_batch_size < 0:\n",
+ " windows_batch_size = n_windows\n",
+ " n_batches = int(np.ceil(n_windows / windows_batch_size))\n",
+ "\n",
+ " valid_losses = []\n",
+ " batch_sizes = []\n",
+ " for i in range(n_batches):\n",
+ " # Create and normalize windows [Ws, L + h, C, n_series]\n",
+ " w_idxs = np.arange(i*windows_batch_size, \n",
+ " min((i+1)*windows_batch_size, n_windows))\n",
+ " windows = self._create_windows(batch, step='val', w_idxs=w_idxs)\n",
+ " original_outsample_y = torch.clone(windows['temporal'][:, self.input_size:, y_idx])\n",
+ "\n",
+ " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
+ "\n",
+ " # Parse windows\n",
+ " insample_y, insample_mask, _, outsample_mask, \\\n",
+ " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
+ "\n",
+ " if self.RECURRENT:\n",
+ " output_batch = self._validate_step_recurrent_batch(insample_y=insample_y,\n",
+ " insample_mask=insample_mask,\n",
+ " futr_exog=futr_exog,\n",
+ " hist_exog=hist_exog,\n",
+ " stat_exog=stat_exog,\n",
+ " y_idx=y_idx)\n",
+ " else: \n",
+ " windows_batch = dict(insample_y=insample_y, # [Ws, L, n_series]\n",
+ " insample_mask=insample_mask, # [Ws, L, n_series]\n",
+ " futr_exog=futr_exog, # univariate: [Ws, L, F]; multivariate: [Ws, F, L, n_series]\n",
+ " hist_exog=hist_exog, # univariate: [Ws, L, X]; multivariate: [Ws, X, L, n_series]\n",
+ " stat_exog=stat_exog) # univariate: [Ws, S]; multivariate: [n_series, S]\n",
+ " \n",
+ " # Model Predictions\n",
+ " output_batch = self(windows_batch) \n",
+ "\n",
+ " output_batch = self.loss.domain_map(output_batch)\n",
+ " valid_loss_batch = self._compute_valid_loss(insample_y=insample_y,\n",
+ " outsample_y=original_outsample_y,\n",
+ " output=output_batch, \n",
+ " outsample_mask=outsample_mask,\n",
+ " y_idx=batch['y_idx'])\n",
+ " valid_losses.append(valid_loss_batch)\n",
+ " batch_sizes.append(len(output_batch))\n",
+ " \n",
+ " valid_loss = torch.stack(valid_losses)\n",
+ " batch_sizes = torch.tensor(batch_sizes, device=valid_loss.device)\n",
+ " batch_size = torch.sum(batch_sizes)\n",
+ " valid_loss = torch.sum(valid_loss * batch_sizes) / batch_size\n",
+ "\n",
+ " if torch.isnan(valid_loss):\n",
+ " raise Exception('Loss is NaN, training stopped.')\n",
+ "\n",
+ " valid_loss_log = valid_loss.detach()\n",
+ " self.log(\n",
+ " 'valid_loss',\n",
+ " valid_loss_log.item(),\n",
+ " batch_size=batch_size,\n",
+ " prog_bar=True,\n",
+ " on_epoch=True,\n",
+ " )\n",
+ " self.validation_step_outputs.append(valid_loss_log)\n",
+ " return valid_loss\n",
+ "\n",
+ " def predict_step(self, batch, batch_idx):\n",
+ " if self.RECURRENT:\n",
+ " self.input_size = self.inference_input_size\n",
+ "\n",
+ " # TODO: Hack to compute number of windows\n",
+ " windows = self._create_windows(batch, step='predict')\n",
+ " n_windows = len(windows['temporal'])\n",
+ " y_idx = batch['y_idx']\n",
+ "\n",
+ " # Number of windows in batch\n",
+ " windows_batch_size = self.inference_windows_batch_size\n",
+ " if windows_batch_size < 0:\n",
+ " windows_batch_size = n_windows\n",
+ " n_batches = int(np.ceil(n_windows / windows_batch_size))\n",
+ " y_hats = []\n",
+ " for i in range(n_batches):\n",
+ " # Create and normalize windows [Ws, L+H, C]\n",
+ " w_idxs = np.arange(i*windows_batch_size, \n",
+ " min((i+1)*windows_batch_size, n_windows))\n",
+ " windows = self._create_windows(batch, step='predict', w_idxs=w_idxs)\n",
+ " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
+ "\n",
+ " # Parse windows\n",
+ " insample_y, insample_mask, _, _, \\\n",
+ " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
+ "\n",
+ " if self.RECURRENT: \n",
+ " y_hat = self._predict_step_recurrent_batch(insample_y=insample_y,\n",
+ " insample_mask=insample_mask,\n",
+ " futr_exog=futr_exog,\n",
+ " hist_exog=hist_exog,\n",
+ " stat_exog=stat_exog,\n",
+ " y_idx=y_idx)\n",
+ " else:\n",
+ " y_hat = self._predict_step_direct_batch(insample_y=insample_y,\n",
+ " insample_mask=insample_mask,\n",
+ " futr_exog=futr_exog,\n",
+ " hist_exog=hist_exog,\n",
+ " stat_exog=stat_exog,\n",
+ " y_idx=y_idx) \n",
+ "\n",
+ "\n",
+ " y_hats.append(y_hat)\n",
+ " y_hat = torch.cat(y_hats, dim=0)\n",
+ " self.input_size = self.input_size_backup\n",
+ "\n",
+ " return y_hat\n",
+ " \n",
+ " def fit(self, dataset, val_size=0, test_size=0, random_seed=None, distributed_config=None):\n",
+ " \"\"\" Fit.\n",
+ "\n",
+ " The `fit` method, optimizes the neural network's weights using the\n",
+ " initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n",
+ " and the `loss` function as defined during the initialization. \n",
+ " Within `fit` we use a PyTorch Lightning `Trainer` that\n",
+ " inherits the initialization's `self.trainer_kwargs`, to customize\n",
+ " its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n",
+ "\n",
+ " The method is designed to be compatible with SKLearn-like classes\n",
+ " and in particular to be compatible with the StatsForecast library.\n",
+ "\n",
+ " By default the `model` is not saving training checkpoints to protect \n",
+ " disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n",
+ "\n",
+ " **Parameters:**
\n",
+ " `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n",
+ " `val_size`: int, validation size for temporal cross-validation.
\n",
+ " `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n",
+ " `test_size`: int, test size for temporal cross-validation.
\n",
+ " \"\"\"\n",
+ " return self._fit(\n",
+ " dataset=dataset,\n",
+ " batch_size=self.batch_size,\n",
+ " valid_batch_size=self.valid_batch_size,\n",
+ " val_size=val_size,\n",
+ " test_size=test_size,\n",
+ " random_seed=random_seed,\n",
+ " distributed_config=distributed_config,\n",
+ " )\n",
+ "\n",
+ " def predict(self, dataset, test_size=None, step_size=1,\n",
+ " random_seed=None, quantiles=None, **data_module_kwargs):\n",
+ " \"\"\" Predict.\n",
+ "\n",
+ " Neural network prediction with PL's `Trainer` execution of `predict_step`.\n",
+ "\n",
+ " **Parameters:**
\n",
+ " `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n",
+ " `test_size`: int=None, test size for temporal cross-validation.
\n",
+ " `step_size`: int=1, Step size between each window.
\n",
+ " `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n",
+ " `quantiles`: list of floats, optional (default=None), target quantiles to predict.
\n",
+ " `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).\n",
+ " \"\"\"\n",
+ " self._check_exog(dataset)\n",
+ " self._restart_seed(random_seed)\n",
+ " if \"quantile\" in data_module_kwargs:\n",
+ " warnings.warn(\"The 'quantile' argument will be deprecated, use 'quantiles' instead.\")\n",
+ " if quantiles is not None:\n",
+ " raise ValueError(\"You can't specify quantile and quantiles.\")\n",
+ " quantiles = [data_module_kwargs.pop(\"quantile\")]\n",
+ " self._set_quantiles(quantiles)\n",
+ "\n",
+ " self.predict_step_size = step_size\n",
+ " self.decompose_forecast = False\n",
+ " datamodule = TimeSeriesDataModule(dataset=dataset,\n",
+ " valid_batch_size=self.valid_batch_size,\n",
+ " **data_module_kwargs)\n",
+ "\n",
+ " # Protect when case of multiple gpu. PL does not support return preds with multiple gpu.\n",
+ " pred_trainer_kwargs = self.trainer_kwargs.copy()\n",
+ " if (pred_trainer_kwargs.get('accelerator', None) == \"gpu\") and (torch.cuda.device_count() > 1):\n",
+ " pred_trainer_kwargs['devices'] = [0]\n",
+ "\n",
+ " trainer = pl.Trainer(**pred_trainer_kwargs)\n",
+ " fcsts = trainer.predict(self, datamodule=datamodule) \n",
+ " fcsts = torch.vstack(fcsts)\n",
+ "\n",
+ " if self.MULTIVARIATE:\n",
+ " # [B, h, n_series (, Q)] -> [n_series, B, h (, Q)]\n",
+ " fcsts = fcsts.swapaxes(0, 2)\n",
+ " fcsts = fcsts.swapaxes(1, 2)\n",
+ "\n",
+ " fcsts = fcsts.numpy().flatten()\n",
+ " fcsts = fcsts.reshape(-1, len(self.loss.output_names))\n",
+ " return fcsts\n",
+ "\n",
+ " def decompose(self, dataset, step_size=1, random_seed=None, quantiles=None, **data_module_kwargs):\n",
+ " \"\"\" Decompose Predictions.\n",
+ "\n",
+ " Decompose the predictions through the network's layers.\n",
+ " Available methods are `ESRNN`, `NHITS`, `NBEATS`, and `NBEATSx`.\n",
+ "\n",
+ " **Parameters:**
\n",
+ " `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation here](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n",
+ " `step_size`: int=1, step size between each window of temporal data.
\n",
+ " `quantiles`: list of floats, optional (default=None), target quantiles to predict.
\n",
+ " `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).\n",
+ " \"\"\"\n",
+ " # Restart random seed\n",
+ " if random_seed is None:\n",
+ " random_seed = self.random_seed\n",
+ " torch.manual_seed(random_seed)\n",
+ " self._set_quantiles(quantiles)\n",
+ "\n",
+ " self.predict_step_size = step_size\n",
+ " self.decompose_forecast = True\n",
+ " datamodule = TimeSeriesDataModule(dataset=dataset,\n",
+ " valid_batch_size=self.valid_batch_size,\n",
+ " **data_module_kwargs)\n",
+ " trainer = pl.Trainer(**self.trainer_kwargs)\n",
+ " fcsts = trainer.predict(self, datamodule=datamodule)\n",
+ " self.decompose_forecast = False # Default decomposition back to false\n",
+ " return torch.vstack(fcsts).numpy() "
]
}
],
diff --git a/nbs/common.base_multivariate.ipynb b/nbs/common.base_multivariate.ipynb
deleted file mode 100644
index f1321600d..000000000
--- a/nbs/common.base_multivariate.ipynb
+++ /dev/null
@@ -1,625 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| default_exp common._base_multivariate"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "%load_ext autoreload\n",
- "%autoreload 2"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# BaseMultivariate\n",
- "\n",
- "> The `BaseWindows` class contains standard methods shared across window-based multivariate neural networks; in contrast to recurrent neural networks these models commit to a fixed sequence length input."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The standard methods include data preprocessing `_normalization`, optimization utilities like parameter initialization, `training_step`, `validation_step`, and shared `fit` and `predict` methods.These shared methods enable all the `neuralforecast.models` compatibility with the `core.NeuralForecast` wrapper class. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| export\n",
- "import numpy as np\n",
- "import torch\n",
- "import torch.nn as nn\n",
- "import pytorch_lightning as pl\n",
- "import neuralforecast.losses.pytorch as losses\n",
- "\n",
- "from neuralforecast.common._base_model import BaseModel\n",
- "from neuralforecast.common._scalers import TemporalNorm\n",
- "from neuralforecast.tsdataset import TimeSeriesDataModule\n",
- "from neuralforecast.utils import get_indexer_raise_missing"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| export\n",
- "class BaseMultivariate(BaseModel):\n",
- " \"\"\" Base Multivariate\n",
- " \n",
- " Base class for all multivariate models. The forecasts for all time-series are produced simultaneously \n",
- " within each window, which are randomly sampled during training.\n",
- " \n",
- " This class implements the basic functionality for all windows-based models, including:\n",
- " - PyTorch Lightning's methods training_step, validation_step, predict_step.
\n",
- " - fit and predict methods used by NeuralForecast.core class.
\n",
- " - sampling and wrangling methods to generate multivariate windows.\n",
- " \"\"\"\n",
- " def __init__(self, \n",
- " h,\n",
- " input_size,\n",
- " loss,\n",
- " valid_loss,\n",
- " learning_rate,\n",
- " max_steps,\n",
- " val_check_steps,\n",
- " n_series,\n",
- " batch_size,\n",
- " step_size=1,\n",
- " num_lr_decays=0,\n",
- " early_stop_patience_steps=-1,\n",
- " scaler_type='robust',\n",
- " futr_exog_list=None,\n",
- " hist_exog_list=None,\n",
- " stat_exog_list=None,\n",
- " num_workers_loader=0,\n",
- " drop_last_loader=False,\n",
- " random_seed=1, \n",
- " alias=None,\n",
- " optimizer=None,\n",
- " optimizer_kwargs=None,\n",
- " lr_scheduler=None,\n",
- " lr_scheduler_kwargs=None,\n",
- " dataloader_kwargs=None,\n",
- " **trainer_kwargs):\n",
- " super().__init__(\n",
- " random_seed=random_seed,\n",
- " loss=loss,\n",
- " valid_loss=valid_loss,\n",
- " optimizer=optimizer,\n",
- " optimizer_kwargs=optimizer_kwargs,\n",
- " lr_scheduler=lr_scheduler,\n",
- " lr_scheduler_kwargs=lr_scheduler_kwargs, \n",
- " futr_exog_list=futr_exog_list,\n",
- " hist_exog_list=hist_exog_list,\n",
- " stat_exog_list=stat_exog_list,\n",
- " max_steps=max_steps,\n",
- " early_stop_patience_steps=early_stop_patience_steps,\n",
- " **trainer_kwargs,\n",
- " )\n",
- "\n",
- " # Padder to complete train windows, \n",
- " # example y=[1,2,3,4,5] h=3 -> last y_output = [5,0,0]\n",
- " self.h = h\n",
- " self.input_size = input_size\n",
- " self.n_series = n_series\n",
- " self.padder = nn.ConstantPad1d(padding=(0, self.h), value=0.0)\n",
- "\n",
- " # Multivariate models do not support these loss functions yet.\n",
- " unsupported_losses = (\n",
- " losses.sCRPS,\n",
- " losses.MQLoss,\n",
- " losses.DistributionLoss,\n",
- " losses.PMM,\n",
- " losses.GMM,\n",
- " losses.HuberMQLoss,\n",
- " losses.MASE,\n",
- " losses.relMSE,\n",
- " losses.NBMM,\n",
- " )\n",
- " if isinstance(self.loss, unsupported_losses):\n",
- " raise Exception(f\"{self.loss} is not supported in a Multivariate model.\") \n",
- " if isinstance(self.valid_loss, unsupported_losses):\n",
- " raise Exception(f\"{self.valid_loss} is not supported in a Multivariate model.\") \n",
- "\n",
- " self.batch_size = batch_size\n",
- " \n",
- " # Optimization\n",
- " self.learning_rate = learning_rate\n",
- " self.max_steps = max_steps\n",
- " self.num_lr_decays = num_lr_decays\n",
- " self.lr_decay_steps = max(max_steps // self.num_lr_decays, 1) if self.num_lr_decays > 0 else 10e7\n",
- " self.early_stop_patience_steps = early_stop_patience_steps\n",
- " self.val_check_steps = val_check_steps\n",
- " self.step_size = step_size\n",
- "\n",
- " # Scaler\n",
- " self.scaler = TemporalNorm(scaler_type=scaler_type, dim=2) # Time dimension is in the second axis\n",
- "\n",
- " # Fit arguments\n",
- " self.val_size = 0\n",
- " self.test_size = 0\n",
- "\n",
- " # Model state\n",
- " self.decompose_forecast = False\n",
- "\n",
- " # DataModule arguments\n",
- " self.num_workers_loader = num_workers_loader\n",
- " self.dataloader_kwargs = dataloader_kwargs\n",
- " self.drop_last_loader = drop_last_loader\n",
- " # used by on_validation_epoch_end hook\n",
- " self.validation_step_outputs = []\n",
- " self.alias = alias\n",
- "\n",
- " def _create_windows(self, batch, step):\n",
- " # Parse common data\n",
- " window_size = self.input_size + self.h\n",
- " temporal_cols = batch['temporal_cols']\n",
- " temporal = batch['temporal']\n",
- "\n",
- " if step == 'train':\n",
- " if self.val_size + self.test_size > 0:\n",
- " cutoff = -self.val_size - self.test_size\n",
- " temporal = temporal[:, :, :cutoff]\n",
- "\n",
- " temporal = self.padder(temporal)\n",
- " windows = temporal.unfold(dimension=-1, \n",
- " size=window_size, \n",
- " step=self.step_size)\n",
- " # [n_series, C, Ws, L+H] 0, 1, 2, 3\n",
- "\n",
- " # Sample and Available conditions\n",
- " available_idx = temporal_cols.get_loc('available_mask')\n",
- " sample_condition = windows[:, available_idx, :, -self.h:]\n",
- " sample_condition = torch.sum(sample_condition, axis=2) # Sum over time\n",
- " sample_condition = torch.sum(sample_condition, axis=0) # Sum over time-series\n",
- " available_condition = windows[:, available_idx, :, :-self.h]\n",
- " available_condition = torch.sum(available_condition, axis=2) # Sum over time\n",
- " available_condition = torch.sum(available_condition, axis=0) # Sum over time-series\n",
- " final_condition = (sample_condition > 0) & (available_condition > 0) # Of shape [Ws]\n",
- " windows = windows[:, :, final_condition, :]\n",
- "\n",
- " # Get Static data\n",
- " static = batch.get('static', None)\n",
- " static_cols = batch.get('static_cols', None)\n",
- "\n",
- " # Protection of empty windows\n",
- " if final_condition.sum() == 0:\n",
- " raise Exception('No windows available for training')\n",
- "\n",
- " # Sample windows\n",
- " n_windows = windows.shape[2]\n",
- " if self.batch_size is not None:\n",
- " w_idxs = np.random.choice(n_windows, \n",
- " size=self.batch_size,\n",
- " replace=(n_windows < self.batch_size))\n",
- " windows = windows[:, :, w_idxs, :]\n",
- "\n",
- " windows = windows.permute(2, 1, 3, 0) # [Ws, C, L+H, n_series]\n",
- "\n",
- " windows_batch = dict(temporal=windows,\n",
- " temporal_cols=temporal_cols,\n",
- " static=static,\n",
- " static_cols=static_cols)\n",
- "\n",
- " return windows_batch\n",
- "\n",
- " elif step in ['predict', 'val']:\n",
- "\n",
- " if step == 'predict':\n",
- " predict_step_size = self.predict_step_size\n",
- " cutoff = - self.input_size - self.test_size\n",
- " temporal = batch['temporal'][:, :, cutoff:]\n",
- "\n",
- " elif step == 'val':\n",
- " predict_step_size = self.step_size\n",
- " cutoff = -self.input_size - self.val_size - self.test_size\n",
- " if self.test_size > 0:\n",
- " temporal = batch['temporal'][:, :, cutoff:-self.test_size]\n",
- " else:\n",
- " temporal = batch['temporal'][:, :, cutoff:]\n",
- "\n",
- " if (step=='predict') and (self.test_size==0) and (len(self.futr_exog_list)==0):\n",
- " temporal = self.padder(temporal)\n",
- "\n",
- " windows = temporal.unfold(dimension=-1,\n",
- " size=window_size,\n",
- " step=predict_step_size)\n",
- " # [n_series, C, Ws, L+H] -> [Ws, C, L+H, n_series]\n",
- " windows = windows.permute(2, 1, 3, 0)\n",
- "\n",
- " # Get Static data\n",
- " static = batch.get('static', None)\n",
- " static_cols=batch.get('static_cols', None)\n",
- "\n",
- " windows_batch = dict(temporal=windows,\n",
- " temporal_cols=temporal_cols,\n",
- " static=static,\n",
- " static_cols=static_cols)\n",
- "\n",
- "\n",
- " return windows_batch\n",
- " else:\n",
- " raise ValueError(f'Unknown step {step}') \n",
- "\n",
- " def _normalization(self, windows, y_idx):\n",
- " \n",
- " # windows are already filtered by train/validation/test\n",
- " # from the `create_windows_method` nor leakage risk\n",
- " temporal = windows['temporal'] # [Ws, C, L+H, n_series]\n",
- " temporal_cols = windows['temporal_cols'].copy() # [Ws, C, L+H, n_series]\n",
- "\n",
- " # To avoid leakage uses only the lags\n",
- " temporal_data_cols = self._get_temporal_exogenous_cols(temporal_cols=temporal_cols)\n",
- " temporal_idxs = get_indexer_raise_missing(temporal_cols, temporal_data_cols)\n",
- " temporal_idxs = np.append(y_idx, temporal_idxs)\n",
- " temporal_data = temporal[:, temporal_idxs, :, :]\n",
- " temporal_mask = temporal[:, temporal_cols.get_loc('available_mask'), :, :].clone()\n",
- " temporal_mask[:, -self.h:, :] = 0.0\n",
- "\n",
- " # Normalize. self.scaler stores the shift and scale for inverse transform\n",
- " temporal_mask = temporal_mask.unsqueeze(1) # Add channel dimension for scaler.transform.\n",
- " temporal_data = self.scaler.transform(x=temporal_data, mask=temporal_mask)\n",
- " # Replace values in windows dict\n",
- " temporal[:, temporal_idxs, :, :] = temporal_data\n",
- " windows['temporal'] = temporal\n",
- "\n",
- " return windows\n",
- "\n",
- " def _inv_normalization(self, y_hat, temporal_cols, y_idx):\n",
- " # Receives window predictions [Ws, H, n_series]\n",
- " # Broadcasts outputs and inverts normalization\n",
- "\n",
- " # Add C dimension\n",
- " # if y_hat.ndim == 2:\n",
- " # remove_dimension = True\n",
- " # y_hat = y_hat.unsqueeze(-1)\n",
- " # else:\n",
- " # remove_dimension = False\n",
- " \n",
- " y_scale = self.scaler.x_scale[:, [y_idx], :].squeeze(1)\n",
- " y_loc = self.scaler.x_shift[:, [y_idx], :].squeeze(1)\n",
- "\n",
- " # y_scale = torch.repeat_interleave(y_scale, repeats=y_hat.shape[-1], dim=-1)\n",
- " # y_loc = torch.repeat_interleave(y_loc, repeats=y_hat.shape[-1], dim=-1)\n",
- "\n",
- " y_hat = self.scaler.inverse_transform(z=y_hat, x_scale=y_scale, x_shift=y_loc)\n",
- "\n",
- " # if remove_dimension:\n",
- " # y_hat = y_hat.squeeze(-1)\n",
- " # y_loc = y_loc.squeeze(-1)\n",
- " # y_scale = y_scale.squeeze(-1)\n",
- "\n",
- " return y_hat, y_loc, y_scale\n",
- "\n",
- " def _parse_windows(self, batch, windows):\n",
- " # Temporal: [Ws, C, L+H, n_series]\n",
- "\n",
- " # Filter insample lags from outsample horizon\n",
- " mask_idx = batch['temporal_cols'].get_loc('available_mask')\n",
- " y_idx = batch['y_idx'] \n",
- " insample_y = windows['temporal'][:, y_idx, :-self.h, :]\n",
- " insample_mask = windows['temporal'][:, mask_idx, :-self.h, :]\n",
- " outsample_y = windows['temporal'][:, y_idx, -self.h:, :]\n",
- " outsample_mask = windows['temporal'][:, mask_idx, -self.h:, :]\n",
- "\n",
- " # Filter historic exogenous variables\n",
- " if len(self.hist_exog_list):\n",
- " hist_exog_idx = get_indexer_raise_missing(windows['temporal_cols'], self.hist_exog_list)\n",
- " hist_exog = windows['temporal'][:, hist_exog_idx, :-self.h, :]\n",
- " else:\n",
- " hist_exog = None\n",
- " \n",
- " # Filter future exogenous variables\n",
- " if len(self.futr_exog_list):\n",
- " futr_exog_idx = get_indexer_raise_missing(windows['temporal_cols'], self.futr_exog_list)\n",
- " futr_exog = windows['temporal'][:, futr_exog_idx, :, :]\n",
- " else:\n",
- " futr_exog = None\n",
- "\n",
- " # Filter static variables\n",
- " if len(self.stat_exog_list):\n",
- " static_idx = get_indexer_raise_missing(windows['static_cols'], self.stat_exog_list)\n",
- " stat_exog = windows['static'][:, static_idx]\n",
- " else:\n",
- " stat_exog = None\n",
- "\n",
- " return insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog\n",
- "\n",
- " def training_step(self, batch, batch_idx): \n",
- " # Create and normalize windows [batch_size, n_series, C, L+H]\n",
- " windows = self._create_windows(batch, step='train')\n",
- " y_idx = batch['y_idx']\n",
- " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- "\n",
- " windows_batch = dict(insample_y=insample_y, # [Ws, L, n_series]\n",
- " insample_mask=insample_mask, # [Ws, L, n_series]\n",
- " futr_exog=futr_exog, # [Ws, F, L + h, n_series]\n",
- " hist_exog=hist_exog, # [Ws, X, L, n_series]\n",
- " stat_exog=stat_exog) # [n_series, S]\n",
- "\n",
- " # Model Predictions\n",
- " output = self(windows_batch)\n",
- " if self.loss.is_distribution_output:\n",
- " outsample_y, y_loc, y_scale = self._inv_normalization(y_hat=outsample_y,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
- " loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)\n",
- " else:\n",
- " loss = self.loss(y=outsample_y, y_hat=output, mask=outsample_mask)\n",
- "\n",
- " if torch.isnan(loss):\n",
- " print('Model Parameters', self.hparams)\n",
- " print('insample_y', torch.isnan(insample_y).sum())\n",
- " print('outsample_y', torch.isnan(outsample_y).sum())\n",
- " print('output', torch.isnan(output).sum())\n",
- " raise Exception('Loss is NaN, training stopped.')\n",
- "\n",
- " self.log(\n",
- " 'train_loss',\n",
- " loss.detach().item(),\n",
- " batch_size=outsample_y.size(0),\n",
- " prog_bar=True,\n",
- " on_epoch=True,\n",
- " )\n",
- " self.train_trajectories.append((self.global_step, loss.detach().item()))\n",
- " return loss\n",
- "\n",
- " def validation_step(self, batch, batch_idx):\n",
- " if self.val_size == 0:\n",
- " return np.nan\n",
- " \n",
- " # Create and normalize windows [Ws, L+H, C]\n",
- " windows = self._create_windows(batch, step='val')\n",
- " y_idx = batch['y_idx']\n",
- " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- "\n",
- " windows_batch = dict(insample_y=insample_y, # [Ws, L, n_series]\n",
- " insample_mask=insample_mask, # [Ws, L, n_series]\n",
- " futr_exog=futr_exog, # [Ws, F, L + h, n_series]\n",
- " hist_exog=hist_exog, # [Ws, X, L, n_series]\n",
- " stat_exog=stat_exog) # [n_series, S]\n",
- "\n",
- " # Model Predictions\n",
- " output = self(windows_batch)\n",
- " if self.loss.is_distribution_output:\n",
- " outsample_y, y_loc, y_scale = self._inv_normalization(y_hat=outsample_y,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
- "\n",
- " if str(type(self.valid_loss)) in\\\n",
- " [\"\", \"\"]:\n",
- " _, output = self.loss.sample(distr_args=distr_args)\n",
- "\n",
- " # Validation Loss evaluation\n",
- " if self.valid_loss.is_distribution_output:\n",
- " valid_loss = self.valid_loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)\n",
- " else:\n",
- " valid_loss = self.valid_loss(y=outsample_y, y_hat=output, mask=outsample_mask)\n",
- "\n",
- " if torch.isnan(valid_loss):\n",
- " raise Exception('Loss is NaN, training stopped.')\n",
- "\n",
- " self.log(\n",
- " 'valid_loss',\n",
- " valid_loss.detach().item(),\n",
- " batch_size=outsample_y.size(0),\n",
- " prog_bar=True,\n",
- " on_epoch=True,\n",
- " )\n",
- " self.validation_step_outputs.append(valid_loss)\n",
- " return valid_loss\n",
- "\n",
- " def predict_step(self, batch, batch_idx): \n",
- " # Create and normalize windows [Ws, L+H, C]\n",
- " windows = self._create_windows(batch, step='predict')\n",
- " y_idx = batch['y_idx'] \n",
- " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, _, _, \\\n",
- " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- "\n",
- " windows_batch = dict(insample_y=insample_y, # [Ws, L, n_series]\n",
- " insample_mask=insample_mask, # [Ws, L, n_series]\n",
- " futr_exog=futr_exog, # [Ws, F, L + h, n_series]\n",
- " hist_exog=hist_exog, # [Ws, X, L, n_series]\n",
- " stat_exog=stat_exog) # [n_series, S]\n",
- "\n",
- " # Model Predictions\n",
- " output = self(windows_batch)\n",
- " if self.loss.is_distribution_output:\n",
- " _, y_loc, y_scale = self._inv_normalization(y_hat=torch.empty(size=(insample_y.shape[0], \n",
- " self.h, \n",
- " self.n_series),\n",
- " dtype=output[0].dtype,\n",
- " device=output[0].device),\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
- " _, y_hat = self.loss.sample(distr_args=distr_args)\n",
- "\n",
- " if self.loss.return_params:\n",
- " distr_args = torch.stack(distr_args, dim=-1)\n",
- " distr_args = torch.reshape(distr_args, (len(windows[\"temporal\"]), self.h, -1))\n",
- " y_hat = torch.concat((y_hat, distr_args), axis=2)\n",
- " else:\n",
- " y_hat, _, _ = self._inv_normalization(y_hat=output,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " return y_hat\n",
- " \n",
- " def fit(self, dataset, val_size=0, test_size=0, random_seed=None, distributed_config=None):\n",
- " \"\"\" Fit.\n",
- "\n",
- " The `fit` method, optimizes the neural network's weights using the\n",
- " initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n",
- " and the `loss` function as defined during the initialization. \n",
- " Within `fit` we use a PyTorch Lightning `Trainer` that\n",
- " inherits the initialization's `self.trainer_kwargs`, to customize\n",
- " its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n",
- "\n",
- " The method is designed to be compatible with SKLearn-like classes\n",
- " and in particular to be compatible with the StatsForecast library.\n",
- "\n",
- " By default the `model` is not saving training checkpoints to protect \n",
- " disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n",
- "\n",
- " **Parameters:**
\n",
- " `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n",
- " `val_size`: int, validation size for temporal cross-validation.
\n",
- " `test_size`: int, test size for temporal cross-validation.
\n",
- " \"\"\"\n",
- " if distributed_config is not None:\n",
- " raise ValueError(\"multivariate models cannot be trained using distributed data parallel.\")\n",
- " return self._fit(\n",
- " dataset=dataset,\n",
- " batch_size=self.n_series,\n",
- " valid_batch_size=self.n_series,\n",
- " val_size=val_size,\n",
- " test_size=test_size,\n",
- " random_seed=random_seed,\n",
- " shuffle_train=False,\n",
- " distributed_config=None,\n",
- " )\n",
- "\n",
- " def predict(self, dataset, test_size=None, step_size=1, random_seed=None, **data_module_kwargs):\n",
- " \"\"\" Predict.\n",
- "\n",
- " Neural network prediction with PL's `Trainer` execution of `predict_step`.\n",
- "\n",
- " **Parameters:**
\n",
- " `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n",
- " `test_size`: int=None, test size for temporal cross-validation.
\n",
- " `step_size`: int=1, Step size between each window.
\n",
- " `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).\n",
- " \"\"\"\n",
- " self._check_exog(dataset)\n",
- " self._restart_seed(random_seed)\n",
- " data_module_kwargs = self._set_quantile_for_iqloss(**data_module_kwargs)\n",
- "\n",
- " self.predict_step_size = step_size\n",
- " self.decompose_forecast = False\n",
- " datamodule = TimeSeriesDataModule(dataset=dataset, \n",
- " valid_batch_size=self.n_series, \n",
- " batch_size=self.n_series,\n",
- " **data_module_kwargs)\n",
- "\n",
- " # Protect when case of multiple gpu. PL does not support return preds with multiple gpu.\n",
- " pred_trainer_kwargs = self.trainer_kwargs.copy()\n",
- " if (pred_trainer_kwargs.get('accelerator', None) == \"gpu\") and (torch.cuda.device_count() > 1):\n",
- " pred_trainer_kwargs['devices'] = [0]\n",
- "\n",
- " trainer = pl.Trainer(**pred_trainer_kwargs)\n",
- " fcsts = trainer.predict(self, datamodule=datamodule)\n",
- " fcsts = torch.vstack(fcsts).numpy()\n",
- "\n",
- " fcsts = np.transpose(fcsts, (2,0,1))\n",
- " fcsts = fcsts.flatten()\n",
- " fcsts = fcsts.reshape(-1, len(self.loss.output_names))\n",
- " return fcsts\n",
- "\n",
- " def decompose(self, dataset, step_size=1, random_seed=None, **data_module_kwargs):\n",
- " raise NotImplementedError('decompose')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "from fastcore.test import test_fail"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "# test unsupported losses\n",
- "test_fail(\n",
- " lambda: BaseMultivariate(\n",
- " h=1,\n",
- " input_size=1,\n",
- " loss=losses.MQLoss(),\n",
- " valid_loss=losses.RMSE(),\n",
- " learning_rate=1,\n",
- " max_steps=1,\n",
- " val_check_steps=1,\n",
- " n_series=1,\n",
- " batch_size=1,\n",
- " ),\n",
- " contains='MQLoss() is not supported'\n",
- ")\n",
- "\n",
- "test_fail(\n",
- " lambda: BaseMultivariate(\n",
- " h=1,\n",
- " input_size=1,\n",
- " loss=losses.RMSE(),\n",
- " valid_loss=losses.MASE(seasonality=1),\n",
- " learning_rate=1,\n",
- " max_steps=1,\n",
- " val_check_steps=1,\n",
- " n_series=1,\n",
- " batch_size=1,\n",
- " ),\n",
- " contains='MASE() is not supported'\n",
- ")"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "python3",
- "language": "python",
- "name": "python3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/nbs/common.base_recurrent.ipynb b/nbs/common.base_recurrent.ipynb
deleted file mode 100644
index 7b0ed5585..000000000
--- a/nbs/common.base_recurrent.ipynb
+++ /dev/null
@@ -1,663 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| default_exp common._base_recurrent"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "%load_ext autoreload\n",
- "%autoreload 2"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# BaseRecurrent"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "> The `BaseRecurrent` class contains standard methods shared across recurrent neural networks; these models possess the ability to process variable-length sequences of inputs through their internal memory states. The class is represented by `LSTM`, `GRU`, and `RNN`, along with other more sophisticated architectures like `MQCNN`."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The standard methods include `TemporalNorm` preprocessing, optimization utilities like parameter initialization, `training_step`, `validation_step`, and shared `fit` and `predict` methods.These shared methods enable all the `neuralforecast.models` compatibility with the `core.NeuralForecast` wrapper class."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| export\n",
- "import numpy as np\n",
- "import torch\n",
- "import torch.nn as nn\n",
- "import pytorch_lightning as pl\n",
- "import neuralforecast.losses.pytorch as losses\n",
- "\n",
- "from neuralforecast.common._base_model import BaseModel\n",
- "from neuralforecast.common._scalers import TemporalNorm\n",
- "from neuralforecast.tsdataset import TimeSeriesDataModule\n",
- "from neuralforecast.utils import get_indexer_raise_missing"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| export\n",
- "class BaseRecurrent(BaseModel):\n",
- " \"\"\" Base Recurrent\n",
- " \n",
- " Base class for all recurrent-based models. The forecasts are produced sequentially between \n",
- " windows.\n",
- " \n",
- " This class implements the basic functionality for all windows-based models, including:\n",
- " - PyTorch Lightning's methods training_step, validation_step, predict_step.
\n",
- " - fit and predict methods used by NeuralForecast.core class.
\n",
- " - sampling and wrangling methods to sequential windows.
\n",
- " \"\"\"\n",
- " def __init__(self,\n",
- " h,\n",
- " input_size,\n",
- " inference_input_size,\n",
- " loss,\n",
- " valid_loss,\n",
- " learning_rate,\n",
- " max_steps,\n",
- " val_check_steps,\n",
- " batch_size,\n",
- " valid_batch_size,\n",
- " scaler_type='robust',\n",
- " num_lr_decays=0,\n",
- " early_stop_patience_steps=-1,\n",
- " futr_exog_list=None,\n",
- " hist_exog_list=None,\n",
- " stat_exog_list=None,\n",
- " num_workers_loader=0,\n",
- " drop_last_loader=False,\n",
- " random_seed=1, \n",
- " alias=None,\n",
- " optimizer=None,\n",
- " optimizer_kwargs=None,\n",
- " lr_scheduler=None,\n",
- " lr_scheduler_kwargs=None,\n",
- " dataloader_kwargs=None,\n",
- " **trainer_kwargs):\n",
- " super().__init__(\n",
- " random_seed=random_seed,\n",
- " loss=loss,\n",
- " valid_loss=valid_loss,\n",
- " optimizer=optimizer,\n",
- " optimizer_kwargs=optimizer_kwargs,\n",
- " lr_scheduler=lr_scheduler,\n",
- " lr_scheduler_kwargs=lr_scheduler_kwargs,\n",
- " futr_exog_list=futr_exog_list,\n",
- " hist_exog_list=hist_exog_list,\n",
- " stat_exog_list=stat_exog_list,\n",
- " max_steps=max_steps,\n",
- " early_stop_patience_steps=early_stop_patience_steps, \n",
- " **trainer_kwargs,\n",
- " )\n",
- "\n",
- " # Padder to complete train windows, \n",
- " # example y=[1,2,3,4,5] h=3 -> last y_output = [5,0,0]\n",
- " self.h = h\n",
- " self.input_size = input_size\n",
- " self.inference_input_size = inference_input_size\n",
- " self.padder = nn.ConstantPad1d(padding=(0, self.h), value=0.0)\n",
- "\n",
- " unsupported_distributions = ['Bernoulli', 'ISQF']\n",
- " if isinstance(self.loss, losses.DistributionLoss) and\\\n",
- " self.loss.distribution in unsupported_distributions:\n",
- " raise Exception(f'Distribution {self.loss.distribution} not available for Recurrent-based models. Please choose another distribution.')\n",
- "\n",
- " # Valid batch_size\n",
- " self.batch_size = batch_size\n",
- " if valid_batch_size is None:\n",
- " self.valid_batch_size = batch_size\n",
- " else:\n",
- " self.valid_batch_size = valid_batch_size\n",
- "\n",
- " # Optimization\n",
- " self.learning_rate = learning_rate\n",
- " self.max_steps = max_steps\n",
- " self.num_lr_decays = num_lr_decays\n",
- " self.lr_decay_steps = max(max_steps // self.num_lr_decays, 1) if self.num_lr_decays > 0 else 10e7\n",
- " self.early_stop_patience_steps = early_stop_patience_steps\n",
- " self.val_check_steps = val_check_steps\n",
- "\n",
- " # Scaler\n",
- " self.scaler = TemporalNorm(\n",
- " scaler_type=scaler_type,\n",
- " dim=-1, # Time dimension is -1.\n",
- " num_features=1+len(self.hist_exog_list)+len(self.futr_exog_list)\n",
- " )\n",
- "\n",
- " # Fit arguments\n",
- " self.val_size = 0\n",
- " self.test_size = 0\n",
- "\n",
- " # DataModule arguments\n",
- " self.num_workers_loader = num_workers_loader\n",
- " self.dataloader_kwargs = dataloader_kwargs\n",
- " self.drop_last_loader = drop_last_loader\n",
- " # used by on_validation_epoch_end hook\n",
- " self.validation_step_outputs = []\n",
- " self.alias = alias\n",
- "\n",
- " def _normalization(self, batch, val_size=0, test_size=0):\n",
- " temporal = batch['temporal'] # B, C, T\n",
- " temporal_cols = batch['temporal_cols'].copy()\n",
- " y_idx = batch['y_idx']\n",
- "\n",
- " # Separate data and mask\n",
- " temporal_data_cols = self._get_temporal_exogenous_cols(temporal_cols=temporal_cols)\n",
- " temporal_idxs = get_indexer_raise_missing(temporal_cols, temporal_data_cols)\n",
- " temporal_idxs = np.append(y_idx, temporal_idxs)\n",
- " temporal_data = temporal[:, temporal_idxs, :]\n",
- " temporal_mask = temporal[:, temporal_cols.get_loc('available_mask'), :].clone()\n",
- "\n",
- " # Remove validation and test set to prevent leakeage\n",
- " if val_size + test_size > 0:\n",
- " cutoff = val_size + test_size\n",
- " temporal_mask[:, -cutoff:] = 0\n",
- "\n",
- " # Normalize. self.scaler stores the shift and scale for inverse transform\n",
- " temporal_mask = temporal_mask.unsqueeze(1) # Add channel dimension for scaler.transform.\n",
- " temporal_data = self.scaler.transform(x=temporal_data, mask=temporal_mask)\n",
- "\n",
- " # Replace values in windows dict\n",
- " temporal[:, temporal_idxs, :] = temporal_data\n",
- " batch['temporal'] = temporal\n",
- "\n",
- " return batch\n",
- "\n",
- " def _inv_normalization(self, y_hat, temporal_cols, y_idx):\n",
- " # Receives window predictions [B, seq_len, H, output]\n",
- " # Broadcasts outputs and inverts normalization\n",
- "\n",
- " # Get 'y' scale and shift, and add W dimension\n",
- " y_loc = self.scaler.x_shift[:, [y_idx], 0].flatten() #[B,C,T] -> [B] \n",
- " y_scale = self.scaler.x_scale[:, [y_idx], 0].flatten() #[B,C,T] -> [B]\n",
- "\n",
- " # Expand scale and shift to y_hat dimensions\n",
- " y_loc = y_loc.view(*y_loc.shape, *(1,)*(y_hat.ndim-1))#.expand(y_hat) \n",
- " y_scale = y_scale.view(*y_scale.shape, *(1,)*(y_hat.ndim-1))#.expand(y_hat)\n",
- "\n",
- " y_hat = self.scaler.inverse_transform(z=y_hat, x_scale=y_scale, x_shift=y_loc)\n",
- "\n",
- " return y_hat, y_loc, y_scale\n",
- "\n",
- " def _create_windows(self, batch, step):\n",
- " temporal = batch['temporal']\n",
- " temporal_cols = batch['temporal_cols']\n",
- "\n",
- " if step == 'train':\n",
- " if self.val_size + self.test_size > 0:\n",
- " cutoff = -self.val_size - self.test_size\n",
- " temporal = temporal[:, :, :cutoff]\n",
- " temporal = self.padder(temporal)\n",
- "\n",
- " # Truncate batch to shorter time-series \n",
- " av_condition = torch.nonzero(torch.min(temporal[:, temporal_cols.get_loc('available_mask')], axis=0).values)\n",
- " min_time_stamp = int(av_condition.min())\n",
- " \n",
- " available_ts = temporal.shape[-1] - min_time_stamp\n",
- " if available_ts < 1 + self.h:\n",
- " raise Exception(\n",
- " 'Time series too short for given input and output size. \\n'\n",
- " f'Available timestamps: {available_ts}'\n",
- " )\n",
- "\n",
- " temporal = temporal[:, :, min_time_stamp:]\n",
- "\n",
- " if step == 'val':\n",
- " if self.test_size > 0:\n",
- " temporal = temporal[:, :, :-self.test_size]\n",
- " temporal = self.padder(temporal)\n",
- "\n",
- " if step == 'predict':\n",
- " if (self.test_size == 0) and (len(self.futr_exog_list)==0):\n",
- " temporal = self.padder(temporal)\n",
- "\n",
- " # Test size covers all data, pad left one timestep with zeros\n",
- " if temporal.shape[-1] == self.test_size:\n",
- " padder_left = nn.ConstantPad1d(padding=(1, 0), value=0.0)\n",
- " temporal = padder_left(temporal)\n",
- "\n",
- " # Parse batch\n",
- " window_size = 1 + self.h # 1 for current t and h for future\n",
- " windows = temporal.unfold(dimension=-1,\n",
- " size=window_size,\n",
- " step=1)\n",
- "\n",
- " # Truncated backprogatation/inference (shorten sequence where RNNs unroll)\n",
- " n_windows = windows.shape[2]\n",
- " input_size = -1\n",
- " if (step == 'train') and (self.input_size>0):\n",
- " input_size = self.input_size\n",
- " if (input_size > 0) and (n_windows > input_size):\n",
- " max_sampleable_time = n_windows-self.input_size+1\n",
- " start = np.random.choice(max_sampleable_time)\n",
- " windows = windows[:, :, start:(start+input_size), :]\n",
- "\n",
- " if (step == 'val') and (self.inference_input_size>0):\n",
- " cutoff = self.inference_input_size + self.val_size\n",
- " windows = windows[:, :, -cutoff:, :]\n",
- "\n",
- " if (step == 'predict') and (self.inference_input_size>0):\n",
- " cutoff = self.inference_input_size + self.test_size\n",
- " windows = windows[:, :, -cutoff:, :]\n",
- " \n",
- " # [B, C, input_size, 1+H]\n",
- " windows_batch = dict(temporal=windows,\n",
- " temporal_cols=temporal_cols,\n",
- " static=batch.get('static', None),\n",
- " static_cols=batch.get('static_cols', None))\n",
- "\n",
- " return windows_batch\n",
- "\n",
- " def _parse_windows(self, batch, windows):\n",
- " # [B, C, seq_len, 1+H]\n",
- " # Filter insample lags from outsample horizon\n",
- " mask_idx = batch['temporal_cols'].get_loc('available_mask')\n",
- " y_idx = batch['y_idx'] \n",
- " insample_y = windows['temporal'][:, y_idx, :, :-self.h]\n",
- " insample_mask = windows['temporal'][:, mask_idx, :, :-self.h]\n",
- " outsample_y = windows['temporal'][:, y_idx, :, -self.h:].contiguous()\n",
- " outsample_mask = windows['temporal'][:, mask_idx, :, -self.h:].contiguous()\n",
- "\n",
- " # Filter historic exogenous variables\n",
- " if len(self.hist_exog_list):\n",
- " hist_exog_idx = get_indexer_raise_missing(windows['temporal_cols'], self.hist_exog_list)\n",
- " hist_exog = windows['temporal'][:, hist_exog_idx, :, :-self.h]\n",
- " else:\n",
- " hist_exog = None\n",
- " \n",
- " # Filter future exogenous variables\n",
- " if len(self.futr_exog_list):\n",
- " futr_exog_idx = get_indexer_raise_missing(windows['temporal_cols'], self.futr_exog_list)\n",
- " futr_exog = windows['temporal'][:, futr_exog_idx, :, :]\n",
- " else:\n",
- " futr_exog = None\n",
- " # Filter static variables\n",
- " if len(self.stat_exog_list):\n",
- " static_idx = get_indexer_raise_missing(windows['static_cols'], self.stat_exog_list)\n",
- " stat_exog = windows['static'][:, static_idx]\n",
- " else:\n",
- " stat_exog = None\n",
- "\n",
- " return insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog\n",
- "\n",
- " def training_step(self, batch, batch_idx):\n",
- " # Create and normalize windows [Ws, L+H, C]\n",
- " batch = self._normalization(batch, val_size=self.val_size, test_size=self.test_size)\n",
- " windows = self._create_windows(batch, step='train')\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- "\n",
- " windows_batch = dict(insample_y=insample_y, # [B, seq_len, 1]\n",
- " insample_mask=insample_mask, # [B, seq_len, 1]\n",
- " futr_exog=futr_exog, # [B, F, seq_len, 1+H]\n",
- " hist_exog=hist_exog, # [B, C, seq_len]\n",
- " stat_exog=stat_exog) # [B, S]\n",
- "\n",
- " # Model predictions\n",
- " output = self(windows_batch) # tuple([B, seq_len, H, output])\n",
- " if self.loss.is_distribution_output:\n",
- " outsample_y, y_loc, y_scale = self._inv_normalization(y_hat=outsample_y,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=batch['y_idx'])\n",
- " B = output[0].size()[0]\n",
- " T = output[0].size()[1]\n",
- " H = output[0].size()[2]\n",
- " output = [arg.view(-1, *(arg.size()[2:])) for arg in output]\n",
- " outsample_y = outsample_y.view(B*T,H)\n",
- " outsample_mask = outsample_mask.view(B*T,H)\n",
- " y_loc = y_loc.repeat_interleave(repeats=T, dim=0).squeeze(-1)\n",
- " y_scale = y_scale.repeat_interleave(repeats=T, dim=0).squeeze(-1)\n",
- " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
- " loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)\n",
- " else:\n",
- " loss = self.loss(y=outsample_y, y_hat=output, mask=outsample_mask)\n",
- "\n",
- " if torch.isnan(loss):\n",
- " print('Model Parameters', self.hparams)\n",
- " print('insample_y', torch.isnan(insample_y).sum())\n",
- " print('outsample_y', torch.isnan(outsample_y).sum())\n",
- " print('output', torch.isnan(output).sum())\n",
- " raise Exception('Loss is NaN, training stopped.')\n",
- "\n",
- " self.log(\n",
- " 'train_loss',\n",
- " loss.detach().item(),\n",
- " batch_size=outsample_y.size(0),\n",
- " prog_bar=True,\n",
- " on_epoch=True,\n",
- " )\n",
- " self.train_trajectories.append((self.global_step, loss.detach().item()))\n",
- " return loss\n",
- "\n",
- " def validation_step(self, batch, batch_idx):\n",
- " if self.val_size == 0:\n",
- " return np.nan\n",
- "\n",
- " # Create and normalize windows [Ws, L+H, C]\n",
- " batch = self._normalization(batch, val_size=self.val_size, test_size=self.test_size)\n",
- " windows = self._create_windows(batch, step='val')\n",
- " y_idx = batch['y_idx']\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- "\n",
- " windows_batch = dict(insample_y=insample_y, # [B, seq_len, 1]\n",
- " insample_mask=insample_mask, # [B, seq_len, 1]\n",
- " futr_exog=futr_exog, # [B, F, seq_len, 1+H]\n",
- " hist_exog=hist_exog, # [B, C, seq_len]\n",
- " stat_exog=stat_exog) # [B, S]\n",
- "\n",
- " # Remove train y_hat (+1 and -1 for padded last window with zeros)\n",
- " # tuple([B, seq_len, H, output]) -> tuple([B, validation_size, H, output])\n",
- " val_windows = (self.val_size) + 1\n",
- " outsample_y = outsample_y[:, -val_windows:-1, :]\n",
- " outsample_mask = outsample_mask[:, -val_windows:-1, :] \n",
- "\n",
- " # Model predictions\n",
- " output = self(windows_batch) # tuple([B, seq_len, H, output])\n",
- " if self.loss.is_distribution_output:\n",
- " output = [arg[:, -val_windows:-1] for arg in output]\n",
- " outsample_y, y_loc, y_scale = self._inv_normalization(y_hat=outsample_y,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " B = output[0].size()[0]\n",
- " T = output[0].size()[1]\n",
- " H = output[0].size()[2]\n",
- " output = [arg.reshape(-1, *(arg.size()[2:])) for arg in output]\n",
- " outsample_y = outsample_y.reshape(B*T,H)\n",
- " outsample_mask = outsample_mask.reshape(B*T,H)\n",
- " y_loc = y_loc.repeat_interleave(repeats=T, dim=0).squeeze(-1)\n",
- " y_scale = y_scale.repeat_interleave(repeats=T, dim=0).squeeze(-1)\n",
- " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
- " _, sample_mean, quants = self.loss.sample(distr_args=distr_args)\n",
- "\n",
- " if str(type(self.valid_loss)) in\\\n",
- " [\"\", \"\"]:\n",
- " output = quants\n",
- " elif str(type(self.valid_loss)) in [\"\"]:\n",
- " output = torch.unsqueeze(sample_mean, dim=-1) # [N,H,1] -> [N,H]\n",
- " \n",
- " else:\n",
- " output = output[:, -val_windows:-1, :]\n",
- "\n",
- " # Validation Loss evaluation\n",
- " if self.valid_loss.is_distribution_output:\n",
- " valid_loss = self.valid_loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)\n",
- " else:\n",
- " outsample_y, _, _ = self._inv_normalization(y_hat=outsample_y, temporal_cols=batch['temporal_cols'], y_idx=y_idx)\n",
- " output, _, _ = self._inv_normalization(y_hat=output, temporal_cols=batch['temporal_cols'], y_idx=y_idx)\n",
- " valid_loss = self.valid_loss(y=outsample_y, y_hat=output, mask=outsample_mask)\n",
- "\n",
- " if torch.isnan(valid_loss):\n",
- " raise Exception('Loss is NaN, training stopped.')\n",
- "\n",
- " self.log(\n",
- " 'valid_loss',\n",
- " valid_loss.detach().item(),\n",
- " batch_size=outsample_y.size(0),\n",
- " prog_bar=True,\n",
- " on_epoch=True,\n",
- " )\n",
- " self.validation_step_outputs.append(valid_loss)\n",
- " return valid_loss\n",
- "\n",
- " def predict_step(self, batch, batch_idx):\n",
- " # Create and normalize windows [Ws, L+H, C]\n",
- " batch = self._normalization(batch, val_size=0, test_size=self.test_size)\n",
- " windows = self._create_windows(batch, step='predict')\n",
- " y_idx = batch['y_idx']\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, _, _, \\\n",
- " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- "\n",
- " windows_batch = dict(insample_y=insample_y, # [B, seq_len, 1]\n",
- " insample_mask=insample_mask, # [B, seq_len, 1]\n",
- " futr_exog=futr_exog, # [B, F, seq_len, 1+H]\n",
- " hist_exog=hist_exog, # [B, C, seq_len]\n",
- " stat_exog=stat_exog) # [B, S]\n",
- "\n",
- " # Model Predictions\n",
- " output = self(windows_batch) # tuple([B, seq_len, H], ...)\n",
- " if self.loss.is_distribution_output:\n",
- " _, y_loc, y_scale = self._inv_normalization(y_hat=output[0],\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " B = output[0].size()[0]\n",
- " T = output[0].size()[1]\n",
- " H = output[0].size()[2]\n",
- " output = [arg.reshape(-1, *(arg.size()[2:])) for arg in output]\n",
- " y_loc = y_loc.repeat_interleave(repeats=T, dim=0).squeeze(-1)\n",
- " y_scale = y_scale.repeat_interleave(repeats=T, dim=0).squeeze(-1)\n",
- " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
- " _, sample_mean, quants = self.loss.sample(distr_args=distr_args)\n",
- " y_hat = torch.concat((sample_mean, quants), axis=2)\n",
- " y_hat = y_hat.view(B, T, H, -1)\n",
- "\n",
- " if self.loss.return_params:\n",
- " distr_args = torch.stack(distr_args, dim=-1)\n",
- " distr_args = torch.reshape(distr_args, (B, T, H, -1))\n",
- " y_hat = torch.concat((y_hat, distr_args), axis=3)\n",
- " else:\n",
- " y_hat, _, _ = self._inv_normalization(y_hat=output,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " return y_hat\n",
- "\n",
- " def fit(self, dataset, val_size=0, test_size=0, random_seed=None, distributed_config=None):\n",
- " \"\"\" Fit.\n",
- "\n",
- " The `fit` method, optimizes the neural network's weights using the\n",
- " initialization parameters (`learning_rate`, `batch_size`, ...)\n",
- " and the `loss` function as defined during the initialization. \n",
- " Within `fit` we use a PyTorch Lightning `Trainer` that\n",
- " inherits the initialization's `self.trainer_kwargs`, to customize\n",
- " its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n",
- "\n",
- " The method is designed to be compatible with SKLearn-like classes\n",
- " and in particular to be compatible with the StatsForecast library.\n",
- "\n",
- " By default the `model` is not saving training checkpoints to protect \n",
- " disk memory, to get them change `enable_checkpointing=True` in `__init__`. \n",
- "\n",
- " **Parameters:**
\n",
- " `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n",
- " `val_size`: int, validation size for temporal cross-validation.
\n",
- " `test_size`: int, test size for temporal cross-validation.
\n",
- " `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n",
- " \"\"\"\n",
- " return self._fit(\n",
- " dataset=dataset,\n",
- " batch_size=self.batch_size,\n",
- " valid_batch_size=self.valid_batch_size,\n",
- " val_size=val_size,\n",
- " test_size=test_size,\n",
- " random_seed=random_seed,\n",
- " distributed_config=distributed_config,\n",
- " )\n",
- "\n",
- " def predict(self, dataset, step_size=1,\n",
- " random_seed=None, **data_module_kwargs):\n",
- " \"\"\" Predict.\n",
- "\n",
- " Neural network prediction with PL's `Trainer` execution of `predict_step`.\n",
- "\n",
- " **Parameters:**
\n",
- " `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n",
- " `step_size`: int=1, Step size between each window.
\n",
- " `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n",
- " `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).\n",
- " \"\"\"\n",
- " self._check_exog(dataset)\n",
- " self._restart_seed(random_seed)\n",
- " data_module_kwargs = self._set_quantile_for_iqloss(**data_module_kwargs)\n",
- " \n",
- " if step_size > 1:\n",
- " raise Exception('Recurrent models do not support step_size > 1')\n",
- "\n",
- " # fcsts (window, batch, h)\n",
- " # Protect when case of multiple gpu. PL does not support return preds with multiple gpu.\n",
- " pred_trainer_kwargs = self.trainer_kwargs.copy()\n",
- " if (pred_trainer_kwargs.get('accelerator', None) == \"gpu\") and (torch.cuda.device_count() > 1):\n",
- " pred_trainer_kwargs['devices'] = [0]\n",
- "\n",
- " trainer = pl.Trainer(**pred_trainer_kwargs)\n",
- "\n",
- " datamodule = TimeSeriesDataModule(\n",
- " dataset=dataset,\n",
- " valid_batch_size=self.valid_batch_size,\n",
- " num_workers=self.num_workers_loader,\n",
- " **data_module_kwargs\n",
- " )\n",
- " fcsts = trainer.predict(self, datamodule=datamodule)\n",
- " if self.test_size > 0:\n",
- " # Remove warmup windows (from train and validation)\n",
- " # [N,T,H,output], avoid indexing last dim for univariate output compatibility\n",
- " fcsts = torch.vstack([fcst[:, -(1+self.test_size-self.h):,:] for fcst in fcsts])\n",
- " fcsts = fcsts.numpy().flatten()\n",
- " fcsts = fcsts.reshape(-1, len(self.loss.output_names))\n",
- " else:\n",
- " fcsts = torch.vstack([fcst[:,-1:,:] for fcst in fcsts]).numpy().flatten()\n",
- " fcsts = fcsts.reshape(-1, len(self.loss.output_names))\n",
- " return fcsts"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "show_doc(BaseRecurrent, title_level=3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "show_doc(BaseRecurrent.fit, title_level=3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "show_doc(BaseRecurrent.predict, title_level=3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.utils import AirPassengersDF\n",
- "from neuralforecast.tsdataset import TimeSeriesDataset, TimeSeriesDataModule"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "# add h=0,1 unit test for _parse_windows \n",
- "# Declare batch\n",
- "AirPassengersDF['x'] = np.array(len(AirPassengersDF))\n",
- "AirPassengersDF['x2'] = np.array(len(AirPassengersDF)) * 2\n",
- "dataset, indices, dates, ds = TimeSeriesDataset.from_df(df=AirPassengersDF)\n",
- "data = TimeSeriesDataModule(dataset=dataset, batch_size=1, drop_last=True)\n",
- "\n",
- "train_loader = data.train_dataloader()\n",
- "batch = next(iter(train_loader))\n",
- "\n",
- "# Test that hist_exog_list and futr_exog_list correctly filter data that is sent to scaler.\n",
- "baserecurrent = BaseRecurrent(h=12,\n",
- " input_size=117,\n",
- " hist_exog_list=['x', 'x2'],\n",
- " futr_exog_list=['x'],\n",
- " loss=MAE(),\n",
- " valid_loss=MAE(),\n",
- " learning_rate=0.001,\n",
- " max_steps=1,\n",
- " val_check_steps=0,\n",
- " batch_size=1,\n",
- " valid_batch_size=1,\n",
- " windows_batch_size=10,\n",
- " inference_input_size=2,\n",
- " start_padding_enabled=True)\n",
- "\n",
- "windows = baserecurrent._create_windows(batch, step='train')\n",
- "\n",
- "temporal_cols = windows['temporal_cols'].copy() # B, L+H, C\n",
- "temporal_data_cols = baserecurrent._get_temporal_exogenous_cols(temporal_cols=temporal_cols)\n",
- "\n",
- "test_eq(set(temporal_data_cols), set(['x', 'x2']))\n",
- "test_eq(windows['temporal'].shape, torch.Size([1,len(['y', 'x', 'x2', 'available_mask']),117,12+1]))"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "python3",
- "language": "python",
- "name": "python3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/nbs/common.base_windows.ipynb b/nbs/common.base_windows.ipynb
deleted file mode 100644
index 80f12e5f5..000000000
--- a/nbs/common.base_windows.ipynb
+++ /dev/null
@@ -1,897 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "524620c1",
- "metadata": {},
- "outputs": [],
- "source": [
- "#| default_exp common._base_windows"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "15392f6f",
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "%load_ext autoreload\n",
- "%autoreload 2"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "1e0f9607-d12d-44e5-b2be-91a57a0bca79",
- "metadata": {},
- "source": [
- "# BaseWindows\n",
- "\n",
- "> The `BaseWindows` class contains standard methods shared across window-based neural networks; in contrast to recurrent neural networks these models commit to a fixed sequence length input. The class is represented by `MLP`, and other more sophisticated architectures like `NBEATS`, and `NHITS`."
- ]
- },
- {
- "cell_type": "markdown",
- "id": "1730a556-1574-40ad-92a2-23b924ceb398",
- "metadata": {},
- "source": [
- "The standard methods include data preprocessing `_normalization`, optimization utilities like parameter initialization, `training_step`, `validation_step`, and shared `fit` and `predict` methods.These shared methods enable all the `neuralforecast.models` compatibility with the `core.NeuralForecast` wrapper class. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "2508f7a9-1433-4ad8-8f2f-0078c6ed6c3c",
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "44065066-e72a-431f-938f-1528adef9fe8",
- "metadata": {},
- "outputs": [],
- "source": [
- "#| export\n",
- "import numpy as np\n",
- "import torch\n",
- "import torch.nn as nn\n",
- "import pytorch_lightning as pl\n",
- "\n",
- "from neuralforecast.common._base_model import BaseModel\n",
- "from neuralforecast.common._scalers import TemporalNorm\n",
- "from neuralforecast.tsdataset import TimeSeriesDataModule\n",
- "from neuralforecast.utils import get_indexer_raise_missing"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ce70cd14-ecb1-4205-8511-fecbd26c8408",
- "metadata": {},
- "outputs": [],
- "source": [
- "#| export\n",
- "class BaseWindows(BaseModel):\n",
- " \"\"\" Base Windows\n",
- " \n",
- " Base class for all windows-based models. The forecasts are produced separately \n",
- " for each window, which are randomly sampled during training.\n",
- " \n",
- " This class implements the basic functionality for all windows-based models, including:\n",
- " - PyTorch Lightning's methods training_step, validation_step, predict_step.
\n",
- " - fit and predict methods used by NeuralForecast.core class.
\n",
- " - sampling and wrangling methods to generate windows.\n",
- " \"\"\"\n",
- " def __init__(self,\n",
- " h,\n",
- " input_size,\n",
- " loss,\n",
- " valid_loss,\n",
- " learning_rate,\n",
- " max_steps,\n",
- " val_check_steps,\n",
- " batch_size,\n",
- " valid_batch_size,\n",
- " windows_batch_size,\n",
- " inference_windows_batch_size,\n",
- " start_padding_enabled,\n",
- " step_size=1,\n",
- " num_lr_decays=0,\n",
- " early_stop_patience_steps=-1,\n",
- " scaler_type='identity',\n",
- " futr_exog_list=None,\n",
- " hist_exog_list=None,\n",
- " stat_exog_list=None,\n",
- " exclude_insample_y=False,\n",
- " num_workers_loader=0,\n",
- " drop_last_loader=False,\n",
- " random_seed=1,\n",
- " alias=None,\n",
- " optimizer=None,\n",
- " optimizer_kwargs=None,\n",
- " lr_scheduler=None,\n",
- " lr_scheduler_kwargs=None,\n",
- " dataloader_kwargs=None,\n",
- " **trainer_kwargs):\n",
- " super().__init__(\n",
- " random_seed=random_seed,\n",
- " loss=loss,\n",
- " valid_loss=valid_loss,\n",
- " optimizer=optimizer,\n",
- " optimizer_kwargs=optimizer_kwargs,\n",
- " lr_scheduler=lr_scheduler,\n",
- " lr_scheduler_kwargs=lr_scheduler_kwargs,\n",
- " futr_exog_list=futr_exog_list,\n",
- " hist_exog_list=hist_exog_list,\n",
- " stat_exog_list=stat_exog_list,\n",
- " max_steps=max_steps,\n",
- " early_stop_patience_steps=early_stop_patience_steps, \n",
- " **trainer_kwargs,\n",
- " )\n",
- "\n",
- " # Padder to complete train windows, \n",
- " # example y=[1,2,3,4,5] h=3 -> last y_output = [5,0,0]\n",
- " self.h = h\n",
- " self.input_size = input_size\n",
- " self.windows_batch_size = windows_batch_size\n",
- " self.start_padding_enabled = start_padding_enabled\n",
- " if start_padding_enabled:\n",
- " self.padder_train = nn.ConstantPad1d(padding=(self.input_size-1, self.h), value=0.0)\n",
- " else:\n",
- " self.padder_train = nn.ConstantPad1d(padding=(0, self.h), value=0.0)\n",
- "\n",
- " # Batch sizes\n",
- " self.batch_size = batch_size\n",
- " if valid_batch_size is None:\n",
- " self.valid_batch_size = batch_size\n",
- " else:\n",
- " self.valid_batch_size = valid_batch_size\n",
- " if inference_windows_batch_size is None:\n",
- " self.inference_windows_batch_size = windows_batch_size\n",
- " else:\n",
- " self.inference_windows_batch_size = inference_windows_batch_size\n",
- "\n",
- " # Optimization \n",
- " self.learning_rate = learning_rate\n",
- " self.max_steps = max_steps\n",
- " self.num_lr_decays = num_lr_decays\n",
- " self.lr_decay_steps = (\n",
- " max(max_steps // self.num_lr_decays, 1) if self.num_lr_decays > 0 else 10e7\n",
- " )\n",
- " self.early_stop_patience_steps = early_stop_patience_steps\n",
- " self.val_check_steps = val_check_steps\n",
- " self.windows_batch_size = windows_batch_size\n",
- " self.step_size = step_size\n",
- " \n",
- " self.exclude_insample_y = exclude_insample_y\n",
- "\n",
- " # Scaler\n",
- " self.scaler = TemporalNorm(\n",
- " scaler_type=scaler_type,\n",
- " dim=1, # Time dimension is 1.\n",
- " num_features=1+len(self.hist_exog_list)+len(self.futr_exog_list)\n",
- " )\n",
- "\n",
- " # Fit arguments\n",
- " self.val_size = 0\n",
- " self.test_size = 0\n",
- "\n",
- " # Model state\n",
- " self.decompose_forecast = False\n",
- "\n",
- " # DataModule arguments\n",
- " self.num_workers_loader = num_workers_loader\n",
- " self.dataloader_kwargs = dataloader_kwargs\n",
- " self.drop_last_loader = drop_last_loader\n",
- " # used by on_validation_epoch_end hook\n",
- " self.validation_step_outputs = []\n",
- " self.alias = alias\n",
- "\n",
- " def _create_windows(self, batch, step, w_idxs=None):\n",
- " # Parse common data\n",
- " window_size = self.input_size + self.h\n",
- " temporal_cols = batch['temporal_cols']\n",
- " temporal = batch['temporal']\n",
- "\n",
- " if step == 'train':\n",
- " if self.val_size + self.test_size > 0:\n",
- " cutoff = -self.val_size - self.test_size\n",
- " temporal = temporal[:, :, :cutoff]\n",
- "\n",
- " temporal = self.padder_train(temporal)\n",
- " if temporal.shape[-1] < window_size:\n",
- " raise Exception('Time series is too short for training, consider setting a smaller input size or set start_padding_enabled=True')\n",
- " windows = temporal.unfold(dimension=-1, \n",
- " size=window_size, \n",
- " step=self.step_size)\n",
- "\n",
- " # [B, C, Ws, L+H] 0, 1, 2, 3\n",
- " # -> [B * Ws, L+H, C] 0, 2, 3, 1\n",
- " windows_per_serie = windows.shape[2]\n",
- " windows = windows.permute(0, 2, 3, 1).contiguous()\n",
- " windows = windows.reshape(-1, window_size, len(temporal_cols))\n",
- "\n",
- " # Sample and Available conditions\n",
- " available_idx = temporal_cols.get_loc('available_mask')\n",
- " available_condition = windows[:, :self.input_size, available_idx]\n",
- " available_condition = torch.sum(available_condition, axis=1)\n",
- " final_condition = (available_condition > 0)\n",
- " if self.h > 0:\n",
- " sample_condition = windows[:, self.input_size:, available_idx]\n",
- " sample_condition = torch.sum(sample_condition, axis=1)\n",
- " final_condition = (sample_condition > 0) & (available_condition > 0)\n",
- " windows = windows[final_condition]\n",
- "\n",
- " # Parse Static data to match windows\n",
- " # [B, S_in] -> [B, Ws, S_in] -> [B*Ws, S_in]\n",
- " static = batch.get('static', None)\n",
- " static_cols=batch.get('static_cols', None)\n",
- " if static is not None:\n",
- " static = torch.repeat_interleave(static, \n",
- " repeats=windows_per_serie, dim=0)\n",
- " static = static[final_condition]\n",
- "\n",
- " # Protection of empty windows\n",
- " if final_condition.sum() == 0:\n",
- " raise Exception('No windows available for training')\n",
- "\n",
- " # Sample windows\n",
- " n_windows = len(windows)\n",
- " if self.windows_batch_size is not None:\n",
- " w_idxs = np.random.choice(n_windows, \n",
- " size=self.windows_batch_size,\n",
- " replace=(n_windows < self.windows_batch_size))\n",
- " windows = windows[w_idxs]\n",
- " \n",
- " if static is not None:\n",
- " static = static[w_idxs]\n",
- "\n",
- " # think about interaction available * sample mask\n",
- " # [B, C, Ws, L+H]\n",
- " windows_batch = dict(temporal=windows,\n",
- " temporal_cols=temporal_cols,\n",
- " static=static,\n",
- " static_cols=static_cols)\n",
- " return windows_batch\n",
- "\n",
- " elif step in ['predict', 'val']:\n",
- "\n",
- " if step == 'predict':\n",
- " initial_input = temporal.shape[-1] - self.test_size\n",
- " if initial_input <= self.input_size: # There is not enough data to predict first timestamp\n",
- " padder_left = nn.ConstantPad1d(padding=(self.input_size-initial_input, 0), value=0.0)\n",
- " temporal = padder_left(temporal)\n",
- " predict_step_size = self.predict_step_size\n",
- " cutoff = - self.input_size - self.test_size\n",
- " temporal = temporal[:, :, cutoff:]\n",
- "\n",
- " elif step == 'val':\n",
- " predict_step_size = self.step_size\n",
- " cutoff = -self.input_size - self.val_size - self.test_size\n",
- " if self.test_size > 0:\n",
- " temporal = batch['temporal'][:, :, cutoff:-self.test_size]\n",
- " else:\n",
- " temporal = batch['temporal'][:, :, cutoff:]\n",
- " if temporal.shape[-1] < window_size:\n",
- " initial_input = temporal.shape[-1] - self.val_size\n",
- " padder_left = nn.ConstantPad1d(padding=(self.input_size-initial_input, 0), value=0.0)\n",
- " temporal = padder_left(temporal)\n",
- "\n",
- " if (step=='predict') and (self.test_size==0) and (len(self.futr_exog_list)==0):\n",
- " padder_right = nn.ConstantPad1d(padding=(0, self.h), value=0.0)\n",
- " temporal = padder_right(temporal)\n",
- "\n",
- " windows = temporal.unfold(dimension=-1,\n",
- " size=window_size,\n",
- " step=predict_step_size)\n",
- "\n",
- " # [batch, channels, windows, window_size] 0, 1, 2, 3\n",
- " # -> [batch * windows, window_size, channels] 0, 2, 3, 1\n",
- " windows_per_serie = windows.shape[2]\n",
- " windows = windows.permute(0, 2, 3, 1).contiguous()\n",
- " windows = windows.reshape(-1, window_size, len(temporal_cols))\n",
- "\n",
- " static = batch.get('static', None)\n",
- " static_cols=batch.get('static_cols', None)\n",
- " if static is not None:\n",
- " static = torch.repeat_interleave(static, \n",
- " repeats=windows_per_serie, dim=0)\n",
- " \n",
- " # Sample windows for batched prediction\n",
- " if w_idxs is not None:\n",
- " windows = windows[w_idxs]\n",
- " if static is not None:\n",
- " static = static[w_idxs]\n",
- " \n",
- " windows_batch = dict(temporal=windows,\n",
- " temporal_cols=temporal_cols,\n",
- " static=static,\n",
- " static_cols=static_cols)\n",
- " return windows_batch\n",
- " else:\n",
- " raise ValueError(f'Unknown step {step}')\n",
- "\n",
- " def _normalization(self, windows, y_idx):\n",
- " # windows are already filtered by train/validation/test\n",
- " # from the `create_windows_method` nor leakage risk\n",
- " temporal = windows['temporal'] # B, L+H, C\n",
- " temporal_cols = windows['temporal_cols'].copy() # B, L+H, C\n",
- "\n",
- " # To avoid leakage uses only the lags\n",
- " #temporal_data_cols = temporal_cols.drop('available_mask').tolist()\n",
- " temporal_data_cols = self._get_temporal_exogenous_cols(temporal_cols=temporal_cols)\n",
- " temporal_idxs = get_indexer_raise_missing(temporal_cols, temporal_data_cols)\n",
- " temporal_idxs = np.append(y_idx, temporal_idxs)\n",
- " temporal_data = temporal[:, :, temporal_idxs]\n",
- " temporal_mask = temporal[:, :, temporal_cols.get_loc('available_mask')].clone()\n",
- " if self.h > 0:\n",
- " temporal_mask[:, -self.h:] = 0.0\n",
- "\n",
- " # Normalize. self.scaler stores the shift and scale for inverse transform\n",
- " temporal_mask = temporal_mask.unsqueeze(-1) # Add channel dimension for scaler.transform.\n",
- " temporal_data = self.scaler.transform(x=temporal_data, mask=temporal_mask)\n",
- "\n",
- " # Replace values in windows dict\n",
- " temporal[:, :, temporal_idxs] = temporal_data\n",
- " windows['temporal'] = temporal\n",
- "\n",
- " return windows\n",
- "\n",
- " def _inv_normalization(self, y_hat, temporal_cols, y_idx):\n",
- " # Receives window predictions [B, H, output]\n",
- " # Broadcasts outputs and inverts normalization\n",
- "\n",
- " # Add C dimension\n",
- " if y_hat.ndim == 2:\n",
- " remove_dimension = True\n",
- " y_hat = y_hat.unsqueeze(-1)\n",
- " else:\n",
- " remove_dimension = False\n",
- "\n",
- " y_scale = self.scaler.x_scale[:, :, [y_idx]]\n",
- " y_loc = self.scaler.x_shift[:, :, [y_idx]]\n",
- "\n",
- " y_scale = torch.repeat_interleave(y_scale, repeats=y_hat.shape[-1], dim=-1).to(y_hat.device)\n",
- " y_loc = torch.repeat_interleave(y_loc, repeats=y_hat.shape[-1], dim=-1).to(y_hat.device)\n",
- "\n",
- " y_hat = self.scaler.inverse_transform(z=y_hat, x_scale=y_scale, x_shift=y_loc)\n",
- " y_loc = y_loc.to(y_hat.device)\n",
- " y_scale = y_scale.to(y_hat.device)\n",
- " \n",
- " if remove_dimension:\n",
- " y_hat = y_hat.squeeze(-1)\n",
- " y_loc = y_loc.squeeze(-1)\n",
- " y_scale = y_scale.squeeze(-1)\n",
- "\n",
- " return y_hat, y_loc, y_scale\n",
- "\n",
- " def _parse_windows(self, batch, windows):\n",
- " # Filter insample lags from outsample horizon\n",
- " y_idx = batch['y_idx']\n",
- " mask_idx = batch['temporal_cols'].get_loc('available_mask')\n",
- "\n",
- " insample_y = windows['temporal'][:, :self.input_size, y_idx]\n",
- " insample_mask = windows['temporal'][:, :self.input_size, mask_idx]\n",
- "\n",
- " # Declare additional information\n",
- " outsample_y = None\n",
- " outsample_mask = None\n",
- " hist_exog = None\n",
- " futr_exog = None\n",
- " stat_exog = None\n",
- "\n",
- " if self.h > 0:\n",
- " outsample_y = windows['temporal'][:, self.input_size:, y_idx]\n",
- " outsample_mask = windows['temporal'][:, self.input_size:, mask_idx]\n",
- "\n",
- " if len(self.hist_exog_list):\n",
- " hist_exog_idx = get_indexer_raise_missing(windows['temporal_cols'], self.hist_exog_list)\n",
- " hist_exog = windows['temporal'][:, :self.input_size, hist_exog_idx]\n",
- "\n",
- " if len(self.futr_exog_list):\n",
- " futr_exog_idx = get_indexer_raise_missing(windows['temporal_cols'], self.futr_exog_list)\n",
- " futr_exog = windows['temporal'][:, :, futr_exog_idx]\n",
- "\n",
- " if len(self.stat_exog_list):\n",
- " static_idx = get_indexer_raise_missing(windows['static_cols'], self.stat_exog_list)\n",
- " stat_exog = windows['static'][:, static_idx]\n",
- "\n",
- " # TODO: think a better way of removing insample_y features\n",
- " if self.exclude_insample_y:\n",
- " insample_y = insample_y * 0\n",
- "\n",
- " return insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog\n",
- "\n",
- " def training_step(self, batch, batch_idx):\n",
- " # Create and normalize windows [Ws, L+H, C]\n",
- " windows = self._create_windows(batch, step='train')\n",
- " y_idx = batch['y_idx']\n",
- " original_outsample_y = torch.clone(windows['temporal'][:,-self.h:,y_idx])\n",
- " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- "\n",
- " windows_batch = dict(insample_y=insample_y, # [Ws, L]\n",
- " insample_mask=insample_mask, # [Ws, L]\n",
- " futr_exog=futr_exog, # [Ws, L + h, F]\n",
- " hist_exog=hist_exog, # [Ws, L, X]\n",
- " stat_exog=stat_exog) # [Ws, S]\n",
- "\n",
- " # Model Predictions\n",
- " output = self(windows_batch)\n",
- " if self.loss.is_distribution_output:\n",
- " _, y_loc, y_scale = self._inv_normalization(y_hat=outsample_y,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " outsample_y = original_outsample_y\n",
- " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
- " loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)\n",
- " else:\n",
- " loss = self.loss(y=outsample_y, y_hat=output, mask=outsample_mask)\n",
- "\n",
- " if torch.isnan(loss):\n",
- " print('Model Parameters', self.hparams)\n",
- " print('insample_y', torch.isnan(insample_y).sum())\n",
- " print('outsample_y', torch.isnan(outsample_y).sum())\n",
- " print('output', torch.isnan(output).sum())\n",
- " raise Exception('Loss is NaN, training stopped.')\n",
- "\n",
- " self.log(\n",
- " 'train_loss',\n",
- " loss.detach().item(),\n",
- " batch_size=outsample_y.size(0),\n",
- " prog_bar=True,\n",
- " on_epoch=True,\n",
- " )\n",
- " self.train_trajectories.append((self.global_step, loss.detach().item()))\n",
- " return loss\n",
- "\n",
- " def _compute_valid_loss(self, outsample_y, output, outsample_mask, temporal_cols, y_idx):\n",
- " if self.loss.is_distribution_output:\n",
- " _, y_loc, y_scale = self._inv_normalization(y_hat=outsample_y,\n",
- " temporal_cols=temporal_cols,\n",
- " y_idx=y_idx)\n",
- " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
- " _, sample_mean, quants = self.loss.sample(distr_args=distr_args)\n",
- "\n",
- " if str(type(self.valid_loss)) in\\\n",
- " [\"\", \"\"]:\n",
- " output = quants\n",
- " elif str(type(self.valid_loss)) in [\"\"]:\n",
- " output = torch.unsqueeze(sample_mean, dim=-1) # [N,H,1] -> [N,H]\n",
- "\n",
- " # Validation Loss evaluation\n",
- " if self.valid_loss.is_distribution_output:\n",
- " valid_loss = self.valid_loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)\n",
- " else:\n",
- " output, _, _ = self._inv_normalization(y_hat=output,\n",
- " temporal_cols=temporal_cols,\n",
- " y_idx=y_idx)\n",
- " valid_loss = self.valid_loss(y=outsample_y, y_hat=output, mask=outsample_mask)\n",
- " return valid_loss\n",
- " \n",
- " def validation_step(self, batch, batch_idx):\n",
- " if self.val_size == 0:\n",
- " return np.nan\n",
- "\n",
- " # TODO: Hack to compute number of windows\n",
- " windows = self._create_windows(batch, step='val')\n",
- " n_windows = len(windows['temporal'])\n",
- " y_idx = batch['y_idx']\n",
- "\n",
- " # Number of windows in batch\n",
- " windows_batch_size = self.inference_windows_batch_size\n",
- " if windows_batch_size < 0:\n",
- " windows_batch_size = n_windows\n",
- " n_batches = int(np.ceil(n_windows/windows_batch_size))\n",
- "\n",
- " valid_losses = []\n",
- " batch_sizes = []\n",
- " for i in range(n_batches):\n",
- " # Create and normalize windows [Ws, L+H, C]\n",
- " w_idxs = np.arange(i*windows_batch_size, \n",
- " min((i+1)*windows_batch_size, n_windows))\n",
- " windows = self._create_windows(batch, step='val', w_idxs=w_idxs)\n",
- " original_outsample_y = torch.clone(windows['temporal'][:,-self.h:,y_idx])\n",
- " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, _, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- "\n",
- " windows_batch = dict(insample_y=insample_y, # [Ws, L]\n",
- " insample_mask=insample_mask, # [Ws, L]\n",
- " futr_exog=futr_exog, # [Ws, L + h, F]\n",
- " hist_exog=hist_exog, # [Ws, L, X]\n",
- " stat_exog=stat_exog) # [Ws, S]\n",
- " \n",
- " # Model Predictions\n",
- " output_batch = self(windows_batch)\n",
- " valid_loss_batch = self._compute_valid_loss(outsample_y=original_outsample_y,\n",
- " output=output_batch, outsample_mask=outsample_mask,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=batch['y_idx'])\n",
- " valid_losses.append(valid_loss_batch)\n",
- " batch_sizes.append(len(output_batch))\n",
- " \n",
- " valid_loss = torch.stack(valid_losses)\n",
- " batch_sizes = torch.tensor(batch_sizes, device=valid_loss.device)\n",
- " batch_size = torch.sum(batch_sizes)\n",
- " valid_loss = torch.sum(valid_loss * batch_sizes) / batch_size\n",
- "\n",
- " if torch.isnan(valid_loss):\n",
- " raise Exception('Loss is NaN, training stopped.')\n",
- "\n",
- " self.log(\n",
- " 'valid_loss',\n",
- " valid_loss.detach().item(),\n",
- " batch_size=batch_size,\n",
- " prog_bar=True,\n",
- " on_epoch=True,\n",
- " )\n",
- " self.validation_step_outputs.append(valid_loss)\n",
- " return valid_loss\n",
- "\n",
- " def predict_step(self, batch, batch_idx):\n",
- "\n",
- " # TODO: Hack to compute number of windows\n",
- " windows = self._create_windows(batch, step='predict')\n",
- " n_windows = len(windows['temporal'])\n",
- " y_idx = batch['y_idx']\n",
- "\n",
- " # Number of windows in batch\n",
- " windows_batch_size = self.inference_windows_batch_size\n",
- " if windows_batch_size < 0:\n",
- " windows_batch_size = n_windows\n",
- " n_batches = int(np.ceil(n_windows/windows_batch_size))\n",
- "\n",
- " y_hats = []\n",
- " for i in range(n_batches):\n",
- " # Create and normalize windows [Ws, L+H, C]\n",
- " w_idxs = np.arange(i*windows_batch_size, \n",
- " min((i+1)*windows_batch_size, n_windows))\n",
- " windows = self._create_windows(batch, step='predict', w_idxs=w_idxs)\n",
- " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, _, _, \\\n",
- " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- "\n",
- " windows_batch = dict(insample_y=insample_y, # [Ws, L]\n",
- " insample_mask=insample_mask, # [Ws, L]\n",
- " futr_exog=futr_exog, # [Ws, L + h, F]\n",
- " hist_exog=hist_exog, # [Ws, L, X]\n",
- " stat_exog=stat_exog) # [Ws, S] \n",
- "\n",
- " # Model Predictions\n",
- " output_batch = self(windows_batch)\n",
- " # Inverse normalization and sampling\n",
- " if self.loss.is_distribution_output:\n",
- " _, y_loc, y_scale = self._inv_normalization(y_hat=torch.empty(size=(insample_y.shape[0], self.h),\n",
- " dtype=output_batch[0].dtype,\n",
- " device=output_batch[0].device),\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " distr_args = self.loss.scale_decouple(output=output_batch, loc=y_loc, scale=y_scale)\n",
- " _, sample_mean, quants = self.loss.sample(distr_args=distr_args)\n",
- " y_hat = torch.concat((sample_mean, quants), axis=2)\n",
- "\n",
- " if self.loss.return_params:\n",
- " distr_args = torch.stack(distr_args, dim=-1)\n",
- " distr_args = torch.reshape(distr_args, (len(windows[\"temporal\"]), self.h, -1))\n",
- " y_hat = torch.concat((y_hat, distr_args), axis=2)\n",
- " else:\n",
- " y_hat, _, _ = self._inv_normalization(y_hat=output_batch,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " y_hats.append(y_hat)\n",
- " y_hat = torch.cat(y_hats, dim=0)\n",
- " return y_hat\n",
- " \n",
- " def fit(self, dataset, val_size=0, test_size=0, random_seed=None, distributed_config=None):\n",
- " \"\"\" Fit.\n",
- "\n",
- " The `fit` method, optimizes the neural network's weights using the\n",
- " initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n",
- " and the `loss` function as defined during the initialization. \n",
- " Within `fit` we use a PyTorch Lightning `Trainer` that\n",
- " inherits the initialization's `self.trainer_kwargs`, to customize\n",
- " its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n",
- "\n",
- " The method is designed to be compatible with SKLearn-like classes\n",
- " and in particular to be compatible with the StatsForecast library.\n",
- "\n",
- " By default the `model` is not saving training checkpoints to protect \n",
- " disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n",
- "\n",
- " **Parameters:**
\n",
- " `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n",
- " `val_size`: int, validation size for temporal cross-validation.
\n",
- " `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n",
- " `test_size`: int, test size for temporal cross-validation.
\n",
- " \"\"\"\n",
- " return self._fit(\n",
- " dataset=dataset,\n",
- " batch_size=self.batch_size,\n",
- " valid_batch_size=self.valid_batch_size,\n",
- " val_size=val_size,\n",
- " test_size=test_size,\n",
- " random_seed=random_seed,\n",
- " distributed_config=distributed_config,\n",
- " )\n",
- "\n",
- " def predict(self, dataset, test_size=None, step_size=1,\n",
- " random_seed=None, **data_module_kwargs):\n",
- " \"\"\" Predict.\n",
- "\n",
- " Neural network prediction with PL's `Trainer` execution of `predict_step`.\n",
- "\n",
- " **Parameters:**
\n",
- " `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n",
- " `test_size`: int=None, test size for temporal cross-validation.
\n",
- " `step_size`: int=1, Step size between each window.
\n",
- " `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n",
- " `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).\n",
- " \"\"\"\n",
- " self._check_exog(dataset)\n",
- " self._restart_seed(random_seed)\n",
- " data_module_kwargs = self._set_quantile_for_iqloss(**data_module_kwargs)\n",
- "\n",
- " self.predict_step_size = step_size\n",
- " self.decompose_forecast = False\n",
- " datamodule = TimeSeriesDataModule(dataset=dataset,\n",
- " valid_batch_size=self.valid_batch_size,\n",
- " **data_module_kwargs)\n",
- "\n",
- " # Protect when case of multiple gpu. PL does not support return preds with multiple gpu.\n",
- " pred_trainer_kwargs = self.trainer_kwargs.copy()\n",
- " if (pred_trainer_kwargs.get('accelerator', None) == \"gpu\") and (torch.cuda.device_count() > 1):\n",
- " pred_trainer_kwargs['devices'] = [0]\n",
- "\n",
- " trainer = pl.Trainer(**pred_trainer_kwargs)\n",
- " fcsts = trainer.predict(self, datamodule=datamodule) \n",
- " fcsts = torch.vstack(fcsts).numpy().flatten()\n",
- " fcsts = fcsts.reshape(-1, len(self.loss.output_names))\n",
- " return fcsts\n",
- "\n",
- " def decompose(self, dataset, step_size=1, random_seed=None, **data_module_kwargs):\n",
- " \"\"\" Decompose Predictions.\n",
- "\n",
- " Decompose the predictions through the network's layers.\n",
- " Available methods are `ESRNN`, `NHITS`, `NBEATS`, and `NBEATSx`.\n",
- "\n",
- " **Parameters:**
\n",
- " `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation here](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n",
- " `step_size`: int=1, step size between each window of temporal data.
\n",
- " `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).\n",
- " \"\"\"\n",
- " # Restart random seed\n",
- " if random_seed is None:\n",
- " random_seed = self.random_seed\n",
- " torch.manual_seed(random_seed)\n",
- " data_module_kwargs = self._set_quantile_for_iqloss(**data_module_kwargs)\n",
- "\n",
- " self.predict_step_size = step_size\n",
- " self.decompose_forecast = True\n",
- " datamodule = TimeSeriesDataModule(dataset=dataset,\n",
- " valid_batch_size=self.valid_batch_size,\n",
- " **data_module_kwargs)\n",
- " trainer = pl.Trainer(**self.trainer_kwargs)\n",
- " fcsts = trainer.predict(self, datamodule=datamodule)\n",
- " self.decompose_forecast = False # Default decomposition back to false\n",
- " return torch.vstack(fcsts).numpy()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "1712ea15",
- "metadata": {},
- "outputs": [],
- "source": [
- "show_doc(BaseWindows, title_level=3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "48063f70",
- "metadata": {},
- "outputs": [],
- "source": [
- "show_doc(BaseWindows.fit, title_level=3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "75529be6",
- "metadata": {},
- "outputs": [],
- "source": [
- "show_doc(BaseWindows.predict, title_level=3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "a1f8315d",
- "metadata": {},
- "outputs": [],
- "source": [
- "show_doc(BaseWindows.decompose, title_level=3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "8927f2e5-f376-4c99-bb8f-8cbb73efe01e",
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.utils import AirPassengersDF\n",
- "from neuralforecast.tsdataset import TimeSeriesDataset, TimeSeriesDataModule"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "61490e69-f014-4087-83c5-540d5bd7d458",
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "# add h=0,1 unit test for _parse_windows \n",
- "# Declare batch\n",
- "AirPassengersDF['x'] = np.array(len(AirPassengersDF))\n",
- "AirPassengersDF['x2'] = np.array(len(AirPassengersDF)) * 2\n",
- "dataset, indices, dates, ds = TimeSeriesDataset.from_df(df=AirPassengersDF)\n",
- "data = TimeSeriesDataModule(dataset=dataset, batch_size=1, drop_last=True)\n",
- "\n",
- "train_loader = data.train_dataloader()\n",
- "batch = next(iter(train_loader))\n",
- "\n",
- "# Instantiate BaseWindows to test _parse_windows method h in [0,1]\n",
- "for h in [0, 1]:\n",
- " basewindows = BaseWindows(h=h,\n",
- " input_size=len(AirPassengersDF)-h,\n",
- " hist_exog_list=['x'],\n",
- " loss=MAE(),\n",
- " valid_loss=MAE(),\n",
- " learning_rate=0.001,\n",
- " max_steps=1,\n",
- " val_check_steps=0,\n",
- " batch_size=1,\n",
- " valid_batch_size=1,\n",
- " windows_batch_size=1,\n",
- " inference_windows_batch_size=1,\n",
- " start_padding_enabled=False)\n",
- "\n",
- " windows = basewindows._create_windows(batch, step='train')\n",
- " original_outsample_y = torch.clone(windows['temporal'][:,-basewindows.h:,0])\n",
- " windows = basewindows._normalization(windows=windows, y_idx=0)\n",
- "\n",
- " insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog = basewindows._parse_windows(batch, windows)\n",
- "\n",
- " # Check equality of parsed and original insample_y\n",
- " parsed_insample_y = insample_y.numpy().flatten()\n",
- " original_insample_y = AirPassengersDF.y.values\n",
- " test_eq(parsed_insample_y, original_insample_y[:basewindows.input_size])\n",
- "\n",
- " # Check equality of parsed and original hist_exog\n",
- " parsed_hist_exog = hist_exog.numpy().flatten()\n",
- " original_hist_exog = AirPassengersDF.x.values\n",
- " test_eq(parsed_hist_exog, original_hist_exog[:basewindows.input_size])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "86ab58a9",
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "# Test that start_padding_enabled=True solves the problem of short series\n",
- "h = 12\n",
- "basewindows = BaseWindows(h=h,\n",
- " input_size=500,\n",
- " hist_exog_list=['x'],\n",
- " loss=MAE(),\n",
- " valid_loss=MAE(),\n",
- " learning_rate=0.001,\n",
- " max_steps=1,\n",
- " val_check_steps=0,\n",
- " batch_size=1,\n",
- " valid_batch_size=1,\n",
- " windows_batch_size=10,\n",
- " inference_windows_batch_size=2,\n",
- " start_padding_enabled=True)\n",
- "\n",
- "windows = basewindows._create_windows(batch, step='train')\n",
- "windows = basewindows._normalization(windows=windows, y_idx=0)\n",
- "insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog = basewindows._parse_windows(batch, windows)\n",
- "\n",
- "basewindows.val_size = 12\n",
- "windows = basewindows._create_windows(batch, step='val')\n",
- "windows = basewindows._normalization(windows=windows, y_idx=0)\n",
- "insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog = basewindows._parse_windows(batch, windows)\n",
- "\n",
- "basewindows.test_size = 12\n",
- "basewindows.predict_step_size = 1\n",
- "windows = basewindows._create_windows(batch, step='predict')\n",
- "windows = basewindows._normalization(windows=windows, y_idx=0)\n",
- "insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog = basewindows._parse_windows(batch, windows)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "54d2e850",
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "\n",
- "# Test that hist_exog_list and futr_exog_list correctly filter data.\n",
- "# that is sent to scaler.\n",
- "basewindows = BaseWindows(h=12,\n",
- " input_size=500,\n",
- " hist_exog_list=['x', 'x2'],\n",
- " futr_exog_list=['x'],\n",
- " loss=MAE(),\n",
- " valid_loss=MAE(),\n",
- " learning_rate=0.001,\n",
- " max_steps=1,\n",
- " val_check_steps=0,\n",
- " batch_size=1,\n",
- " valid_batch_size=1,\n",
- " windows_batch_size=10,\n",
- " inference_windows_batch_size=2,\n",
- " start_padding_enabled=True)\n",
- "\n",
- "windows = basewindows._create_windows(batch, step='train')\n",
- "\n",
- "temporal_cols = windows['temporal_cols'].copy() # B, L+H, C\n",
- "temporal_data_cols = basewindows._get_temporal_exogenous_cols(temporal_cols=temporal_cols)\n",
- "\n",
- "test_eq(set(temporal_data_cols), set(['x', 'x2']))\n",
- "test_eq(windows['temporal'].shape, torch.Size([10,500+12,len(['y', 'x', 'x2', 'available_mask'])]))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "bf493ff9",
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "python3",
- "language": "python",
- "name": "python3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/nbs/common.model_checks.ipynb b/nbs/common.model_checks.ipynb
new file mode 100644
index 000000000..d618c5c33
--- /dev/null
+++ b/nbs/common.model_checks.ipynb
@@ -0,0 +1,248 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| default_exp common._model_checks"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 1. Checks for models"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This file provides a set of unit tests for all models"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export\n",
+ "import pandas as pd\n",
+ "import neuralforecast.losses.pytorch as losses\n",
+ "\n",
+ "from neuralforecast import NeuralForecast\n",
+ "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic, generate_series"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export\n",
+ "seed = 0\n",
+ "test_size = 14\n",
+ "FREQ = \"D\"\n",
+ "\n",
+ "# 1 series, no exogenous\n",
+ "N_SERIES_1 = 1\n",
+ "df = generate_series(n_series=N_SERIES_1, seed=seed, freq=FREQ, equal_ends=True)\n",
+ "max_ds = df.ds.max() - pd.Timedelta(test_size, FREQ)\n",
+ "Y_TRAIN_DF_1 = df[df.ds < max_ds]\n",
+ "Y_TEST_DF_1 = df[df.ds >= max_ds]\n",
+ "\n",
+ "# 5 series, no exogenous\n",
+ "N_SERIES_2 = 5\n",
+ "df = generate_series(n_series=N_SERIES_2, seed=seed, freq=FREQ, equal_ends=True)\n",
+ "max_ds = df.ds.max() - pd.Timedelta(test_size, FREQ)\n",
+ "Y_TRAIN_DF_2 = df[df.ds < max_ds]\n",
+ "Y_TEST_DF_2 = df[df.ds >= max_ds]\n",
+ "\n",
+ "# 1 series, with static and temporal exogenous\n",
+ "N_SERIES_3 = 1\n",
+ "df, STATIC_3 = generate_series(n_series=N_SERIES_3, n_static_features=2, \n",
+ " n_temporal_features=2, seed=seed, freq=FREQ, equal_ends=True)\n",
+ "max_ds = df.ds.max() - pd.Timedelta(test_size, FREQ)\n",
+ "Y_TRAIN_DF_3 = df[df.ds < max_ds]\n",
+ "Y_TEST_DF_3 = df[df.ds >= max_ds]\n",
+ "\n",
+ "# 5 series, with static and temporal exogenous\n",
+ "N_SERIES_4 = 5\n",
+ "df, STATIC_4 = generate_series(n_series=N_SERIES_4, n_static_features=2, \n",
+ " n_temporal_features=2, seed=seed, freq=FREQ, equal_ends=True)\n",
+ "max_ds = df.ds.max() - pd.Timedelta(test_size, FREQ)\n",
+ "Y_TRAIN_DF_4 = df[df.ds < max_ds]\n",
+ "Y_TEST_DF_4 = df[df.ds >= max_ds]\n",
+ "\n",
+ "# Generic test for a given config for a model\n",
+ "def _run_model_tests(model_class, config):\n",
+ " if model_class.RECURRENT:\n",
+ " config[\"inference_input_size\"] = config[\"input_size\"]\n",
+ "\n",
+ " # DF_1\n",
+ " if model_class.MULTIVARIATE:\n",
+ " config[\"n_series\"] = N_SERIES_1\n",
+ " if isinstance(config[\"loss\"], losses.relMSE):\n",
+ " config[\"loss\"].y_train = Y_TRAIN_DF_1[\"y\"].values \n",
+ " if isinstance(config[\"valid_loss\"], losses.relMSE):\n",
+ " config[\"valid_loss\"].y_train = Y_TRAIN_DF_1[\"y\"].values \n",
+ "\n",
+ " model = model_class(**config)\n",
+ " fcst = NeuralForecast(models=[model], freq=FREQ)\n",
+ " fcst.fit(df=Y_TRAIN_DF_1, val_size=24)\n",
+ " _ = fcst.predict(futr_df=Y_TEST_DF_1)\n",
+ " # DF_2\n",
+ " if model_class.MULTIVARIATE:\n",
+ " config[\"n_series\"] = N_SERIES_2\n",
+ " if isinstance(config[\"loss\"], losses.relMSE):\n",
+ " config[\"loss\"].y_train = Y_TRAIN_DF_2[\"y\"].values \n",
+ " if isinstance(config[\"valid_loss\"], losses.relMSE):\n",
+ " config[\"valid_loss\"].y_train = Y_TRAIN_DF_2[\"y\"].values\n",
+ " model = model_class(**config)\n",
+ " fcst = NeuralForecast(models=[model], freq=FREQ)\n",
+ " fcst.fit(df=Y_TRAIN_DF_2, val_size=24)\n",
+ " _ = fcst.predict(futr_df=Y_TEST_DF_2)\n",
+ "\n",
+ " if model.EXOGENOUS_STAT and model.EXOGENOUS_FUTR:\n",
+ " # DF_3\n",
+ " if model_class.MULTIVARIATE:\n",
+ " config[\"n_series\"] = N_SERIES_3\n",
+ " if isinstance(config[\"loss\"], losses.relMSE):\n",
+ " config[\"loss\"].y_train = Y_TRAIN_DF_3[\"y\"].values \n",
+ " if isinstance(config[\"valid_loss\"], losses.relMSE):\n",
+ " config[\"valid_loss\"].y_train = Y_TRAIN_DF_3[\"y\"].values\n",
+ " model = model_class(**config)\n",
+ " fcst = NeuralForecast(models=[model], freq=FREQ)\n",
+ " fcst.fit(df=Y_TRAIN_DF_3, static_df=STATIC_3, val_size=24)\n",
+ " _ = fcst.predict(futr_df=Y_TEST_DF_3)\n",
+ "\n",
+ " # DF_4\n",
+ " if model_class.MULTIVARIATE:\n",
+ " config[\"n_series\"] = N_SERIES_4\n",
+ " if isinstance(config[\"loss\"], losses.relMSE):\n",
+ " config[\"loss\"].y_train = Y_TRAIN_DF_4[\"y\"].values \n",
+ " if isinstance(config[\"valid_loss\"], losses.relMSE):\n",
+ " config[\"valid_loss\"].y_train = Y_TRAIN_DF_4[\"y\"].values \n",
+ " model = model_class(**config)\n",
+ " fcst = NeuralForecast(models=[model], freq=FREQ)\n",
+ " fcst.fit(df=Y_TRAIN_DF_4, static_df=STATIC_4, val_size=24)\n",
+ " _ = fcst.predict(futr_df=Y_TEST_DF_4) \n",
+ "\n",
+ "# Tests a model against every loss function\n",
+ "def check_loss_functions(model_class):\n",
+ " loss_list = [losses.MAE(), losses.MSE(), losses.RMSE(), losses.MAPE(), losses.SMAPE(), losses.MASE(seasonality=7), \n",
+ " losses.QuantileLoss(q=0.5), losses.MQLoss(), losses.IQLoss(), losses.DistributionLoss(\"Normal\"), \n",
+ " losses.DistributionLoss(\"StudentT\"), losses.DistributionLoss(\"Poisson\"), losses.DistributionLoss(\"NegativeBinomial\"), \n",
+ " losses.DistributionLoss(\"Tweedie\", rho=1.5), losses.DistributionLoss(\"ISQF\"), losses.PMM(), losses.PMM(weighted=True), \n",
+ " losses.GMM(), losses.GMM(weighted=True), losses.NBMM(), losses.NBMM(weighted=True), losses.HuberLoss(), \n",
+ " losses.TukeyLoss(), losses.HuberQLoss(q=0.5), losses.HuberMQLoss()]\n",
+ " for loss in loss_list:\n",
+ " test_name = f\"{model_class.__name__}: checking {loss._get_name()}\"\n",
+ " print(f\"{test_name}\")\n",
+ " config = {'max_steps': 2,\n",
+ " 'h': 7,\n",
+ " 'input_size': 28,\n",
+ " 'loss': loss,\n",
+ " 'valid_loss': None,\n",
+ " 'enable_progress_bar': False,\n",
+ " 'enable_model_summary': False,\n",
+ " 'val_check_steps': 2} \n",
+ " try:\n",
+ " _run_model_tests(model_class, config) \n",
+ " except RuntimeError:\n",
+ " raise Exception(f\"{test_name} failed.\")\n",
+ " except Exception:\n",
+ " print(f\"{test_name} skipped on raised Exception.\")\n",
+ " pass\n",
+ "\n",
+ "# Tests a model against the AirPassengers dataset\n",
+ "def check_airpassengers(model_class):\n",
+ " print(f\"{model_class.__name__}: checking forecast AirPassengers dataset\")\n",
+ " Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
+ "\n",
+ " config = {'max_steps': 2,\n",
+ " 'h': 12,\n",
+ " 'input_size': 24,\n",
+ " 'enable_progress_bar': False,\n",
+ " 'enable_model_summary': False,\n",
+ " 'val_check_steps': 2,\n",
+ " }\n",
+ "\n",
+ " if model_class.MULTIVARIATE:\n",
+ " config[\"n_series\"] = Y_train_df[\"unique_id\"].nunique()\n",
+ " # Normal forecast\n",
+ " fcst = NeuralForecast(models=[model_class(**config)], freq='M')\n",
+ " fcst.fit(df=Y_train_df, static_df=AirPassengersStatic)\n",
+ " _ = fcst.predict(futr_df=Y_test_df) \n",
+ "\n",
+ " # Cross-validation\n",
+ " fcst = NeuralForecast(models=[model_class(**config)], freq='M')\n",
+ " _ = fcst.cross_validation(df=AirPassengersPanel, static_df=AirPassengersStatic, n_windows=2, step_size=12)\n",
+ "\n",
+ "# Add unit test functions to this function\n",
+ "def check_model(model_class, checks=[\"losses\", \"airpassengers\"]):\n",
+ " \"\"\"\n",
+ " Check model with various tests. Options for checks are:
\n",
+ " \"losses\": test the model against all loss functions
\n",
+ " \"airpassengers\": test the model against the airpassengers dataset for forecasting and cross-validation
\n",
+ " \n",
+ " \"\"\"\n",
+ " if \"losses\" in checks:\n",
+ " check_loss_functions(model_class) \n",
+ " if \"airpassengers\" in checks:\n",
+ " try:\n",
+ " check_airpassengers(model_class) \n",
+ " except RuntimeError:\n",
+ " raise Exception(f\"{model_class.__name__}: AirPassengers forecast test failed.\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| eval: false\n",
+ "#| hide\n",
+ "# Run tests in this file. This is a slow test\n",
+ "import warnings\n",
+ "import logging\n",
+ "from neuralforecast.models import RNN, GRU, TCN, LSTM, DeepAR, DilatedRNN, BiTCN, MLP, NBEATS, NBEATSx, NHITS, DLinear, NLinear, TiDE, DeepNPTS, TFT, VanillaTransformer, Informer, Autoformer, FEDformer, TimesNet, iTransformer, KAN, RMoK, StemGNN, TSMixer, TSMixerx, MLPMultivariate, SOFTS, TimeMixer\n",
+ "\n",
+ "models = [RNN, GRU, TCN, LSTM, DeepAR, DilatedRNN, BiTCN, MLP, NBEATS, NBEATSx, NHITS, DLinear, NLinear, TiDE, DeepNPTS, TFT, VanillaTransformer, Informer, Autoformer, FEDformer, TimesNet, iTransformer, KAN, RMoK, StemGNN, TSMixer, TSMixerx, MLPMultivariate, SOFTS, TimeMixer]\n",
+ "\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " for model in models:\n",
+ " check_model(model, checks=[\"losses\"])"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "python3",
+ "language": "python",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/nbs/common.modules.ipynb b/nbs/common.modules.ipynb
index f90e936da..403a2a5d6 100644
--- a/nbs/common.modules.ipynb
+++ b/nbs/common.modules.ipynb
@@ -691,6 +691,66 @@
" x = x + self.mean\n",
" return x"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export\n",
+ "class RevINMultivariate(nn.Module):\n",
+ " \"\"\" \n",
+ " ReversibleInstanceNorm1d for Multivariate models\n",
+ " \"\"\" \n",
+ " def __init__(self, num_features: int, eps=1e-5, affine=False, subtract_last=False, non_norm=False):\n",
+ " super().__init__()\n",
+ " self.num_features = num_features\n",
+ " self.eps = eps\n",
+ " self.affine = affine\n",
+ " if self.affine:\n",
+ " self._init_params()\n",
+ "\n",
+ " def forward(self, x, mode: str):\n",
+ " if mode == 'norm':\n",
+ " x = self._normalize(x)\n",
+ " elif mode == 'denorm':\n",
+ " x = self._denormalize(x)\n",
+ " else:\n",
+ " raise NotImplementedError\n",
+ " return x\n",
+ "\n",
+ " def _init_params(self):\n",
+ " # initialize RevIN params: (C,)\n",
+ " self.affine_weight = nn.Parameter(torch.ones((1, 1, self.num_features)))\n",
+ " self.affine_bias = nn.Parameter(torch.zeros((1, 1, self.num_features)))\n",
+ "\n",
+ " def _normalize(self, x):\n",
+ " # Batch statistics\n",
+ " self.batch_mean = torch.mean(x, axis=1, keepdim=True).detach()\n",
+ " self.batch_std = torch.sqrt(torch.var(x, axis=1, keepdim=True, unbiased=False) + self.eps).detach()\n",
+ " \n",
+ " # Instance normalization\n",
+ " x = x - self.batch_mean\n",
+ " x = x / self.batch_std\n",
+ " \n",
+ " if self.affine:\n",
+ " x = x * self.affine_weight\n",
+ " x = x + self.affine_bias\n",
+ "\n",
+ " return x\n",
+ "\n",
+ " def _denormalize(self, x):\n",
+ " # Reverse the normalization\n",
+ " if self.affine:\n",
+ " x = x - self.affine_bias\n",
+ " x = x / self.affine_weight \n",
+ " \n",
+ " x = x * self.batch_std\n",
+ " x = x + self.batch_mean \n",
+ "\n",
+ " return x"
+ ]
}
],
"metadata": {
diff --git a/nbs/common.scalers.ipynb b/nbs/common.scalers.ipynb
index 9e6737c3c..f49714a6b 100644
--- a/nbs/common.scalers.ipynb
+++ b/nbs/common.scalers.ipynb
@@ -682,11 +682,11 @@
" def _init_params(self, num_features):\n",
" # Initialize RevIN scaler params to broadcast:\n",
" if self.dim==1: # [B,T,C] [1,1,C]\n",
- " self.revin_bias = nn.Parameter(torch.zeros(1,1,num_features))\n",
- " self.revin_weight = nn.Parameter(torch.ones(1,1,num_features))\n",
+ " self.revin_bias = nn.Parameter(torch.zeros(1, 1, num_features, 1))\n",
+ " self.revin_weight = nn.Parameter(torch.ones(1, 1, num_features, 1))\n",
" elif self.dim==-1: # [B,C,T] [1,C,1]\n",
- " self.revin_bias = nn.Parameter(torch.zeros(1,num_features,1))\n",
- " self.revin_weight = nn.Parameter(torch.ones(1,num_features,1))\n",
+ " self.revin_bias = nn.Parameter(torch.zeros(1, num_features, 1, 1))\n",
+ " self.revin_weight = nn.Parameter(torch.ones(1, num_features, 1, 1))\n",
"\n",
" #@torch.no_grad()\n",
" def transform(self, x, mask):\n",
@@ -863,8 +863,8 @@
"#| hide\n",
"# Validate scalers\n",
"for scaler_type in [None, 'identity', 'standard', 'robust', 'minmax', 'minmax1', 'invariant', 'revin']:\n",
- " x = 1.0*torch.tensor(np_x)\n",
- " mask = torch.tensor(np_mask)\n",
+ " x = 1.0*torch.tensor(np_x).unsqueeze(-1)\n",
+ " mask = torch.tensor(np_mask).unsqueeze(-1)\n",
" scaler = TemporalNorm(scaler_type=scaler_type, dim=1, num_features=np_x.shape[-1])\n",
" x_scaled = scaler.transform(x=x, mask=mask)\n",
" x_recovered = scaler.inverse_transform(x_scaled)\n",
@@ -987,14 +987,6 @@
"nf = NeuralForecast(models=[model], freq='MS')\n",
"Y_hat_df = nf.cross_validation(df=Y_df, val_size=12, n_windows=1)"
]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "b2f50bd8",
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {
diff --git a/nbs/core.ipynb b/nbs/core.ipynb
index 3bb61dbdc..c44411f07 100644
--- a/nbs/core.ipynb
+++ b/nbs/core.ipynb
@@ -84,6 +84,7 @@
"\n",
"from neuralforecast.common._base_model import DistributedConfig\n",
"from neuralforecast.compat import SparkDataFrame\n",
+ "from neuralforecast.losses.pytorch import IQLoss\n",
"from neuralforecast.tsdataset import _FilesDataset, TimeSeriesDataset, LocalFilesTimeSeriesDataset\n",
"from neuralforecast.models import (\n",
" GRU, LSTM, RNN, TCN, DeepAR, DilatedRNN,\n",
@@ -96,7 +97,7 @@
" TimeMixer, KAN, RMoK\n",
")\n",
"from neuralforecast.common._base_auto import BaseAuto, MockTrial\n",
- "from neuralforecast.utils import PredictionIntervals, get_prediction_interval_method"
+ "from neuralforecast.utils import PredictionIntervals, get_prediction_interval_method, level_to_quantiles, quantiles_to_level"
]
},
{
@@ -337,6 +338,7 @@
" # Flags and attributes\n",
" self._fitted = False\n",
" self._reset_models()\n",
+ " self._add_level = False\n",
"\n",
" def _scalers_fit_transform(self, dataset: TimeSeriesDataset) -> None:\n",
" self.scalers_ = {} \n",
@@ -737,13 +739,14 @@
" names: List[str] = []\n",
" count_names = {'model': 0}\n",
" for model in self.models:\n",
- " if add_level and model.loss.outputsize_multiplier > 1:\n",
- " continue\n",
- "\n",
" model_name = repr(model)\n",
" count_names[model_name] = count_names.get(model_name, -1) + 1\n",
" if count_names[model_name] > 0:\n",
" model_name += str(count_names[model_name])\n",
+ "\n",
+ " if add_level and (model.loss.outputsize_multiplier > 1 or isinstance(model.loss, IQLoss)):\n",
+ " continue\n",
+ "\n",
" names.extend(model_name + n for n in model.loss.output_names)\n",
" return names\n",
"\n",
@@ -863,6 +866,7 @@
" verbose: bool = False,\n",
" engine = None,\n",
" level: Optional[List[Union[int, float]]] = None,\n",
+ " quantiles: Optional[List[float]] = None,\n",
" **data_kwargs\n",
" ):\n",
" \"\"\"Predict with core.NeuralForecast.\n",
@@ -886,6 +890,8 @@
" Distributed engine for inference. Only used if df is a spark dataframe or if fit was called on a spark dataframe.\n",
" level : list of ints or floats, optional (default=None)\n",
" Confidence levels between 0 and 100.\n",
+ " quantiles : list of floats, optional (default=None)\n",
+ " Alternative to level, target quantiles to predict.\n",
" data_kwargs : kwargs\n",
" Extra arguments to be passed to the dataset within each model.\n",
"\n",
@@ -900,6 +906,22 @@
"\n",
" if not self._fitted:\n",
" raise Exception(\"You must fit the model before predicting.\")\n",
+ " \n",
+ " quantiles_ = None\n",
+ " level_ = None\n",
+ " has_level = False \n",
+ " if level is not None:\n",
+ " has_level = True\n",
+ " if quantiles is not None:\n",
+ " raise ValueError(\"You can't set both level and quantiles.\")\n",
+ " level_ = sorted(list(set(level)))\n",
+ " quantiles_ = level_to_quantiles(level_)\n",
+ " \n",
+ " if quantiles is not None:\n",
+ " if level is not None:\n",
+ " raise ValueError(\"You can't set both level and quantiles.\") \n",
+ " quantiles_ = sorted(list(set(quantiles)))\n",
+ " level_ = quantiles_to_level(quantiles_)\n",
"\n",
" needed_futr_exog = self._get_needed_futr_exog()\n",
" if needed_futr_exog:\n",
@@ -949,8 +971,6 @@
" if verbose: print('Using stored dataset.')\n",
" \n",
"\n",
- " cols = self._get_model_names()\n",
- "\n",
" # Placeholder dataframe for predictions with unique_id and ds\n",
" fcsts_df = ufp.make_future_dataframe(\n",
" uids=uids,\n",
@@ -994,24 +1014,14 @@
" )\n",
" self._scalers_transform(futr_dataset)\n",
" dataset = dataset.append(futr_dataset)\n",
- "\n",
- " col_idx = 0\n",
- " fcsts = np.full((self.h * len(uids), len(cols)), fill_value=np.nan, dtype=np.float32)\n",
- " for model in self.models:\n",
- " old_test_size = model.get_test_size()\n",
- " model.set_test_size(self.h) # To predict h steps ahead\n",
- " model_fcsts = model.predict(dataset=dataset, **data_kwargs)\n",
- " # Append predictions in memory placeholder\n",
- " output_length = len(model.loss.output_names)\n",
- " fcsts[:, col_idx : col_idx + output_length] = model_fcsts\n",
- " col_idx += output_length\n",
- " model.set_test_size(old_test_size) # Set back to original value\n",
+ " \n",
+ " fcsts, cols = self._generate_forecasts(dataset=dataset, uids=uids, quantiles_=quantiles_, level_=level_, has_level=has_level, **data_kwargs)\n",
+ " \n",
" if self.scalers_:\n",
" indptr = np.append(0, np.full(len(uids), self.h).cumsum())\n",
" fcsts = self._scalers_target_inverse_transform(fcsts, indptr)\n",
"\n",
" # Declare predictions pd.DataFrame\n",
- " cols = self._get_model_names() # Needed for IQLoss as column names may have changed during the call to .predict()\n",
" if isinstance(fcsts_df, pl_DataFrame):\n",
" fcsts = pl_DataFrame(dict(zip(cols, fcsts.T)))\n",
" else:\n",
@@ -1021,25 +1031,6 @@
" _warn_id_as_idx()\n",
" fcsts_df = fcsts_df.set_index(self.id_col)\n",
"\n",
- " # add prediction intervals\n",
- " if level is not None:\n",
- " if self._cs_df is None or self.prediction_intervals is None:\n",
- " raise Exception('You must fit the model with prediction_intervals to use level.')\n",
- " else:\n",
- " level_ = sorted(level)\n",
- " model_names = self._get_model_names(add_level=True)\n",
- " prediction_interval_method = get_prediction_interval_method(self.prediction_intervals.method)\n",
- "\n",
- " fcsts_df = prediction_interval_method(\n",
- " fcsts_df,\n",
- " self._cs_df,\n",
- " model_names=list(model_names),\n",
- " level=level_,\n",
- " cs_n_windows=self.prediction_intervals.n_windows,\n",
- " n_series=len(uids),\n",
- " horizon=self.h,\n",
- " )\n",
- "\n",
" return fcsts_df\n",
"\n",
" def _reset_models(self):\n",
@@ -1085,15 +1076,6 @@
" if self.dataset.min_size < (val_size+test_size):\n",
" warnings.warn('Validation and test sets are larger than the shorter time-series.')\n",
"\n",
- " cols = []\n",
- " count_names = {'model': 0}\n",
- " for model in self.models:\n",
- " model_name = repr(model)\n",
- " count_names[model_name] = count_names.get(model_name, -1) + 1\n",
- " if count_names[model_name] > 0:\n",
- " model_name += str(count_names[model_name])\n",
- " cols += [model_name + n for n in model.loss.output_names]\n",
- "\n",
" fcsts_df = ufp.cv_times(\n",
" times=self.ds,\n",
" uids=self.uids,\n",
@@ -1107,20 +1089,20 @@
" # the cv_times is sorted by window and then id\n",
" fcsts_df = ufp.sort(fcsts_df, [id_col, 'cutoff', time_col])\n",
"\n",
- " col_idx = 0\n",
- " fcsts = np.full((self.dataset.n_groups * self.h * n_windows, len(cols)),\n",
- " np.nan, dtype=np.float32)\n",
- " \n",
+ " fcsts_list: List = []\n",
" for model in self.models:\n",
+ " if self._add_level and (model.loss.outputsize_multiplier > 1 or isinstance(model.loss, IQLoss)):\n",
+ " continue\n",
+ "\n",
" model.fit(dataset=self.dataset,\n",
" val_size=val_size, \n",
" test_size=test_size)\n",
" model_fcsts = model.predict(self.dataset, step_size=step_size, **data_kwargs)\n",
"\n",
" # Append predictions in memory placeholder\n",
- " output_length = len(model.loss.output_names)\n",
- " fcsts[:,col_idx:(col_idx + output_length)] = model_fcsts\n",
- " col_idx += output_length\n",
+ " fcsts_list.append(model_fcsts)\n",
+ "\n",
+ " fcsts = np.concatenate(fcsts_list, axis=-1)\n",
" # we may have allocated more space than needed\n",
" # each serie can produce at most (serie.size - 1) // self.h CV windows\n",
" effective_sizes = ufp.counts_by_id(fcsts_df, id_col)['counts'].to_numpy()\n",
@@ -1148,6 +1130,7 @@
" self._fitted = True\n",
"\n",
" # Add predictions to forecasts DataFrame\n",
+ " cols = self._get_model_names(add_level=self._add_level)\n",
" if isinstance(self.uids, pl_Series):\n",
" fcsts = pl_DataFrame(dict(zip(cols, fcsts.T)))\n",
" else:\n",
@@ -1164,7 +1147,7 @@
" if isinstance(fcsts_df, pd.DataFrame) and _id_as_idx():\n",
" _warn_id_as_idx()\n",
" fcsts_df = fcsts_df.set_index(id_col)\n",
- " return fcsts_df\n",
+ " return fcsts_df \n",
"\n",
" def cross_validation(\n",
" self,\n",
@@ -1183,6 +1166,7 @@
" target_col: str = 'y',\n",
" prediction_intervals: Optional[PredictionIntervals] = None,\n",
" level: Optional[List[Union[int, float]]] = None,\n",
+ " quantiles: Optional[List[float]] = None,\n",
" **data_kwargs\n",
" ) -> DataFrame:\n",
" \"\"\"Temporal Cross-Validation with core.NeuralForecast.\n",
@@ -1224,7 +1208,9 @@
" prediction_intervals : PredictionIntervals, optional (default=None)\n",
" Configuration to calibrate prediction intervals (Conformal Prediction). \n",
" level : list of ints or floats, optional (default=None)\n",
- " Confidence levels between 0 and 100. Use with prediction_intervals. \n",
+ " Confidence levels between 0 and 100.\n",
+ " quantiles : list of floats, optional (default=None)\n",
+ " Alternative to level, target quantiles to predict.\n",
" data_kwargs : kwargs\n",
" Extra arguments to be passed to the dataset within each model.\n",
"\n",
@@ -1257,15 +1243,15 @@
" df = df.reset_index(id_col) \n",
"\n",
" # Checks for prediction intervals\n",
- " if prediction_intervals is not None or level is not None:\n",
- " if level is None:\n",
- " warnings.warn('Level not provided, using level=[90].')\n",
- " level = [90]\n",
- " if prediction_intervals is None:\n",
- " raise Exception('You must set prediction_intervals to use level.')\n",
+ " if prediction_intervals is not None:\n",
+ " if level is None and quantiles is None:\n",
+ " raise Exception('When passing prediction_intervals you need to set the level or quantiles argument.') \n",
" if not refit:\n",
- " raise Exception('Passing prediction_intervals and/or level is only supported with refit=True.') \n",
+ " raise Exception('Passing prediction_intervals is only supported with refit=True.') \n",
"\n",
+ " if level is not None and quantiles is not None:\n",
+ " raise ValueError(\"You can't set both level and quantiles argument.\")\n",
+ " \n",
" if not refit:\n",
"\n",
" return self._no_refit_cross_validation(\n",
@@ -1326,6 +1312,7 @@
" sort_df=sort_df,\n",
" verbose=verbose,\n",
" level=level,\n",
+ " quantiles=quantiles,\n",
" **data_kwargs\n",
" )\n",
" preds = ufp.join(preds, cutoffs, on=id_col, how='left')\n",
@@ -1347,7 +1334,7 @@
" out = out.set_index(id_col)\n",
" return out\n",
"\n",
- " def predict_insample(self, step_size: int = 1):\n",
+ " def predict_insample(self, step_size: int = 1, **data_kwargs):\n",
" \"\"\"Predict insample with core.NeuralForecast.\n",
"\n",
" `core.NeuralForecast`'s `predict_insample` uses stored fitted `models`\n",
@@ -1365,23 +1352,7 @@
" \"\"\"\n",
" if not self._fitted:\n",
" raise Exception('The models must be fitted first with `fit` or `cross_validation`.')\n",
- "\n",
- " for model in self.models:\n",
- " if model.SAMPLING_TYPE == 'recurrent':\n",
- " warnings.warn(f'Predict insample might not provide accurate predictions for \\\n",
- " recurrent model {repr(model)} class yet due to scaling.')\n",
- " print(f'WARNING: Predict insample might not provide accurate predictions for \\\n",
- " recurrent model {repr(model)} class yet due to scaling.')\n",
" \n",
- " cols = []\n",
- " count_names = {'model': 0}\n",
- " for model in self.models:\n",
- " model_name = repr(model)\n",
- " count_names[model_name] = count_names.get(model_name, -1) + 1\n",
- " if count_names[model_name] > 0:\n",
- " model_name += str(count_names[model_name])\n",
- " cols += [model_name + n for n in model.loss.output_names]\n",
- "\n",
" # Remove test set from dataset and last dates\n",
" test_size = self.models[0].get_test_size()\n",
"\n",
@@ -1417,9 +1388,7 @@
" time_col=self.time_col,\n",
" )\n",
"\n",
- " col_idx = 0\n",
- " fcsts = np.full((len(fcsts_df), len(cols)), np.nan, dtype=np.float32)\n",
- "\n",
+ " fcsts_list: List = []\n",
" for model in self.models:\n",
" # Test size is the number of periods to forecast (full size of trimmed dataset)\n",
" model.set_test_size(test_size=trimmed_dataset.max_size)\n",
@@ -1427,10 +1396,9 @@
" # Predict\n",
" model_fcsts = model.predict(trimmed_dataset, step_size=step_size)\n",
" # Append predictions in memory placeholder\n",
- " output_length = len(model.loss.output_names)\n",
- " fcsts[:,col_idx:(col_idx + output_length)] = model_fcsts\n",
- " col_idx += output_length \n",
+ " fcsts_list.append(model_fcsts) \n",
" model.set_test_size(test_size=test_size) # Set original test_size\n",
+ " fcsts = np.concatenate(fcsts_list, axis=-1)\n",
"\n",
" # original y\n",
" original_y = {\n",
@@ -1440,6 +1408,7 @@
" }\n",
"\n",
" # Add predictions to forecasts DataFrame\n",
+ " cols = self._get_model_names()\n",
" if isinstance(self.uids, pl_Series):\n",
" fcsts = pl_DataFrame(dict(zip(cols, fcsts.T)))\n",
" Y_df = pl_DataFrame(original_y)\n",
@@ -1703,6 +1672,7 @@
" \"Please reduce the number of windows, horizon or remove those series.\"\n",
" )\n",
" \n",
+ " self._add_level = True\n",
" cv_results = self.cross_validation(\n",
" df=df,\n",
" static_df=static_df,\n",
@@ -1711,7 +1681,8 @@
" time_col=time_col,\n",
" target_col=target_col,\n",
" )\n",
- " \n",
+ " self._add_level = False\n",
+ "\n",
" kept = [time_col, id_col, 'cutoff']\n",
" # conformity score for each model\n",
" for model in self._get_model_names(add_level=True):\n",
@@ -1721,7 +1692,102 @@
" abs_err = abs(cv_results[model] - cv_results[target_col])\n",
" cv_results = ufp.assign_columns(cv_results, model, abs_err)\n",
" dropped = list(set(cv_results.columns) - set(kept))\n",
- " return ufp.drop_columns(cv_results, dropped) "
+ " return ufp.drop_columns(cv_results, dropped) \n",
+ " \n",
+ " def _generate_forecasts(self, dataset: TimeSeriesDataset, uids: Series, quantiles_: Optional[List[float]] = None, level_: Optional[List[Union[int, float]]] = None, has_level: Optional[bool] = False, **data_kwargs) -> np.array:\n",
+ " fcsts_list: List = []\n",
+ " cols = []\n",
+ " count_names = {'model': 0}\n",
+ " for model in self.models:\n",
+ " old_test_size = model.get_test_size()\n",
+ " model.set_test_size(self.h) # To predict h steps ahead\n",
+ " \n",
+ " # Increment model name if the same model is used more than once\n",
+ " model_name = repr(model)\n",
+ " count_names[model_name] = count_names.get(model_name, -1) + 1\n",
+ " if count_names[model_name] > 0:\n",
+ " model_name += str(count_names[model_name])\n",
+ "\n",
+ " # Predict for every quantile or level if requested and the loss function supports it\n",
+ " # case 1: DistributionLoss and MixtureLosses\n",
+ " if quantiles_ is not None and not isinstance(model.loss, IQLoss) and hasattr(model.loss, 'update_quantile') and callable(model.loss.update_quantile):\n",
+ " model_fcsts = model.predict(dataset=dataset, quantiles = quantiles_, **data_kwargs)\n",
+ " fcsts_list.append(model_fcsts) \n",
+ " col_names = []\n",
+ " for i, quantile in enumerate(quantiles_):\n",
+ " col_name = self._get_column_name(model_name, quantile, has_level)\n",
+ " if i == 0:\n",
+ " col_names.extend([f\"{model_name}\", col_name])\n",
+ " else:\n",
+ " col_names.extend([col_name])\n",
+ " if hasattr(model.loss, 'return_params') and model.loss.return_params:\n",
+ " cols.extend(col_names + [model_name + param_name for param_name in model.loss.param_names])\n",
+ " else:\n",
+ " cols.extend(col_names)\n",
+ " # case 2: IQLoss\n",
+ " elif quantiles_ is not None and isinstance(model.loss, IQLoss):\n",
+ " # IQLoss does not give monotonically increasing quantiles, so we apply a hack: compute all quantiles, and take the quantile over the quantiles\n",
+ " quantiles_iqloss = np.linspace(0.01, 0.99, 20)\n",
+ " fcsts_list_iqloss = []\n",
+ " for i, quantile in enumerate(quantiles_iqloss):\n",
+ " model_fcsts = model.predict(dataset=dataset, quantiles = [quantile], **data_kwargs) \n",
+ " fcsts_list_iqloss.append(model_fcsts) \n",
+ " fcsts_iqloss = np.concatenate(fcsts_list_iqloss, axis=-1)\n",
+ "\n",
+ " # Get the actual requested quantiles\n",
+ " model_fcsts = np.quantile(fcsts_iqloss, quantiles_, axis=-1).T\n",
+ " fcsts_list.append(model_fcsts) \n",
+ "\n",
+ " # Get the right column names\n",
+ " col_names = []\n",
+ " for i, quantile in enumerate(quantiles_):\n",
+ " col_name = self._get_column_name(model_name, quantile, has_level)\n",
+ " col_names.extend([col_name]) \n",
+ " cols.extend(col_names)\n",
+ " # case 3: PointLoss via prediction intervals\n",
+ " elif quantiles_ is not None and model.loss.outputsize_multiplier == 1:\n",
+ " if self.prediction_intervals is None:\n",
+ " raise AttributeError(\n",
+ " f\"You have trained {model_name} with loss={type(model.loss).__name__}(). \\n\"\n",
+ " \" You then must set `prediction_intervals` during fit to use level or quantiles during predict.\") \n",
+ " model_fcsts = model.predict(dataset=dataset, quantiles = quantiles_, **data_kwargs)\n",
+ " prediction_interval_method = get_prediction_interval_method(self.prediction_intervals.method)\n",
+ " fcsts_with_intervals, out_cols = prediction_interval_method(\n",
+ " model_fcsts,\n",
+ " self._cs_df,\n",
+ " model=model_name,\n",
+ " level=level_ if has_level else None,\n",
+ " cs_n_windows=self.prediction_intervals.n_windows,\n",
+ " n_series=len(uids),\n",
+ " horizon=self.h,\n",
+ " quantiles=quantiles_ if not has_level else None,\n",
+ " ) \n",
+ " fcsts_list.append(fcsts_with_intervals) \n",
+ " cols.extend([model_name] + out_cols)\n",
+ " # base case: quantiles or levels are not supported or provided as arguments\n",
+ " else:\n",
+ " model_fcsts = model.predict(dataset=dataset, **data_kwargs)\n",
+ " fcsts_list.append(model_fcsts)\n",
+ " cols.extend(model_name + n for n in model.loss.output_names)\n",
+ " model.set_test_size(old_test_size) # Set back to original value\n",
+ " fcsts = np.concatenate(fcsts_list, axis=-1)\n",
+ "\n",
+ " return fcsts, cols\n",
+ " \n",
+ " @staticmethod\n",
+ " def _get_column_name(model_name, quantile, has_level) -> str:\n",
+ " if not has_level:\n",
+ " col_name = f\"{model_name}_ql{quantile}\" \n",
+ " elif quantile < 0.5:\n",
+ " level_lo = int(round(100 - 200 * quantile))\n",
+ " col_name = f\"{model_name}-lo-{level_lo}\"\n",
+ " elif quantile > 0.5:\n",
+ " level_hi = int(round(100 - 200 * (1 - quantile)))\n",
+ " col_name = f\"{model_name}-hi-{level_hi}\"\n",
+ " else:\n",
+ " col_name = f\"{model_name}-median\"\n",
+ "\n",
+ " return col_name\n"
]
},
{
@@ -1849,7 +1915,7 @@
"from neuralforecast.models.tsmixer import TSMixer\n",
"from neuralforecast.models.tsmixerx import TSMixerx\n",
"\n",
- "from neuralforecast.losses.pytorch import MQLoss, MAE, MSE\n",
+ "from neuralforecast.losses.pytorch import MQLoss, MAE, MSE, DistributionLoss, IQLoss\n",
"from neuralforecast.utils import AirPassengersDF, AirPassengersPanel, AirPassengersStatic\n",
"\n",
"from datetime import date"
@@ -3465,6 +3531,71 @@
")\n",
"assert all([col in cv2.columns for col in ['NHITS-lo-30', 'NHITS-hi-30']])"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b82e7c70",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Test quantile and level argument in predict for different models and errors\n",
+ "prediction_intervals = PredictionIntervals(method=\"conformal_error\")\n",
+ "\n",
+ "models = []\n",
+ "for nf_model in [NHITS, LSTM, TSMixer]:\n",
+ " params = {\"h\": 12, \"input_size\": 24, \"max_steps\": 1, \"loss\": MAE()}\n",
+ " if nf_model.__name__ == \"TSMixer\":\n",
+ " params.update({\"n_series\": 2})\n",
+ " models.append(nf_model(**params))\n",
+ "\n",
+ " params = {\"h\": 12, \"input_size\": 24, \"max_steps\": 1, \"loss\": DistributionLoss(distribution=\"Normal\")}\n",
+ " if nf_model.__name__ == \"TSMixer\":\n",
+ " params.update({\"n_series\": 2})\n",
+ " models.append(nf_model(**params))\n",
+ "\n",
+ " params = {\"h\": 12, \"input_size\": 24, \"max_steps\": 1, \"loss\": IQLoss()}\n",
+ " if nf_model.__name__ == \"TSMixer\":\n",
+ " params.update({\"n_series\": 2})\n",
+ " models.append(nf_model(**params))\n",
+ "\n",
+ "nf = NeuralForecast(models=models, freq='M')\n",
+ "nf.fit(AirPassengersPanel_train, prediction_intervals=prediction_intervals)\n",
+ "# Test default prediction\n",
+ "preds = nf.predict(futr_df=AirPassengersPanel_test)\n",
+ "assert list(preds.columns) == ['unique_id', 'ds', 'NHITS', 'NHITS1', 'NHITS1-median', 'NHITS1-lo-90',\n",
+ " 'NHITS1-lo-80', 'NHITS1-hi-80', 'NHITS1-hi-90', 'NHITS2_ql0.5', 'LSTM',\n",
+ " 'LSTM1', 'LSTM1-median', 'LSTM1-lo-90', 'LSTM1-lo-80', 'LSTM1-hi-80',\n",
+ " 'LSTM1-hi-90', 'LSTM2_ql0.5', 'TSMixer', 'TSMixer1', 'TSMixer1-median',\n",
+ " 'TSMixer1-lo-90', 'TSMixer1-lo-80', 'TSMixer1-hi-80', 'TSMixer1-hi-90',\n",
+ " 'TSMixer2_ql0.5']\n",
+ "# Test quantile prediction\n",
+ "preds = nf.predict(futr_df=AirPassengersPanel_test, quantiles=[0.2, 0.3])\n",
+ "assert list(preds.columns) == ['unique_id', 'ds', 'NHITS', 'NHITS-ql0.2', 'NHITS-ql0.3', 'NHITS1',\n",
+ " 'NHITS1_ql0.2', 'NHITS1_ql0.3', 'NHITS2_ql0.2', 'NHITS2_ql0.3', 'LSTM',\n",
+ " 'LSTM-ql0.2', 'LSTM-ql0.3', 'LSTM1', 'LSTM1_ql0.2', 'LSTM1_ql0.3',\n",
+ " 'LSTM2_ql0.2', 'LSTM2_ql0.3', 'TSMixer', 'TSMixer-ql0.2',\n",
+ " 'TSMixer-ql0.3', 'TSMixer1', 'TSMixer1_ql0.2', 'TSMixer1_ql0.3',\n",
+ " 'TSMixer2_ql0.2', 'TSMixer2_ql0.3']\n",
+ "# Test level prediction\n",
+ "preds = nf.predict(futr_df=AirPassengersPanel_test, level=[80, 90])\n",
+ "assert list(preds.columns) == ['unique_id', 'ds', 'NHITS', 'NHITS-lo-90', 'NHITS-lo-80', 'NHITS-hi-80',\n",
+ " 'NHITS-hi-90', 'NHITS1', 'NHITS1-lo-90', 'NHITS1-lo-80', 'NHITS1-hi-80',\n",
+ " 'NHITS1-hi-90', 'NHITS2-lo-90', 'NHITS2-lo-80', 'NHITS2-hi-80',\n",
+ " 'NHITS2-hi-90', 'LSTM', 'LSTM-lo-90', 'LSTM-lo-80', 'LSTM-hi-80',\n",
+ " 'LSTM-hi-90', 'LSTM1', 'LSTM1-lo-90', 'LSTM1-lo-80', 'LSTM1-hi-80',\n",
+ " 'LSTM1-hi-90', 'LSTM2-lo-90', 'LSTM2-lo-80', 'LSTM2-hi-80',\n",
+ " 'LSTM2-hi-90', 'TSMixer', 'TSMixer-lo-90', 'TSMixer-lo-80',\n",
+ " 'TSMixer-hi-80', 'TSMixer-hi-90', 'TSMixer1', 'TSMixer1-lo-90',\n",
+ " 'TSMixer1-lo-80', 'TSMixer1-hi-80', 'TSMixer1-hi-90', 'TSMixer2-lo-90',\n",
+ " 'TSMixer2-lo-80', 'TSMixer2-hi-80', 'TSMixer2-hi-90']\n",
+ "# Re-Test default prediction - note that they are different from the first test (this is expected)\n",
+ "preds = nf.predict(futr_df=AirPassengersPanel_test)\n",
+ "assert list(preds.columns) == ['unique_id', 'ds', 'NHITS', 'NHITS1', 'NHITS1-median', 'NHITS2_ql0.5',\n",
+ " 'LSTM', 'LSTM1', 'LSTM1-median', 'LSTM2_ql0.5', 'TSMixer', 'TSMixer1',\n",
+ " 'TSMixer1-median', 'TSMixer2_ql0.5']"
+ ]
}
],
"metadata": {
diff --git a/nbs/docs/capabilities/01_overview.ipynb b/nbs/docs/capabilities/01_overview.ipynb
index 11b964a7f..de1f3e374 100644
--- a/nbs/docs/capabilities/01_overview.ipynb
+++ b/nbs/docs/capabilities/01_overview.ipynb
@@ -19,11 +19,11 @@
"|`BiTCN` | `AutoBiTCN` | CNN | Univariate | Direct | F/H/S | \n",
"|`DeepAR` | `AutoDeepAR` | RNN | Univariate | Recursive | F/S | \n",
"|`DeepNPTS` | `AutoDeepNPTS` | MLP | Univariate | Direct | F/H/S | \n",
- "|`DilatedRNN` | `AutoDilatedRNN` | RNN | Univariate | Recursive | F/H/S | \n",
+ "|`DilatedRNN` | `AutoDilatedRNN` | RNN | Univariate | Direct | F/H/S | \n",
"|`FEDformer` | `AutoFEDformer` | Transformer | Univariate | Direct | F | \n",
"|`GRU` | `AutoGRU` | RNN | Univariate | Recursive | F/H/S | \n",
"|`HINT` | `AutoHINT` | Any7 | Both7 | Both7 | F/H/S | \n",
- "|`Informer` | `AutoInformer` | Transformer | Multivariate | Direct | F | \n",
+ "|`Informer` | `AutoInformer` | Transformer | Univariate | Direct | F | \n",
"|`iTransformer` | `AutoiTransformer` | Transformer | Multivariate | Direct | - | \n",
"|`KAN` | `AutoKAN` | KAN | Univariate | Direct | F/H/S | \n",
"|`LSTM` | `AutoLSTM` | RNN | Univariate | Recursive | F/H/S | \n",
@@ -38,7 +38,7 @@
"|`RNN` | `AutoRNN` | RNN | Univariate | Recursive | F/H/S | \n",
"|`SOFTS` | `AutoSOFTS` | MLP | Multivariate | Direct | - | \n",
"|`StemGNN` | `AutoStemGNN` | GNN | Multivariate | Direct | - | \n",
- "|`TCN` | `AutoTCN` | CNN | Univariate | Recursive | F/H/S | \n",
+ "|`TCN` | `AutoTCN` | CNN | Univariate | Direct | F/H/S | \n",
"|`TFT` | `AutoTFT` | Transformer | Univariate | Direct | F/H/S | \n",
"|`TiDE` | `AutoTiDE` | MLP | Univariate | Direct | F/H/S | \n",
"|`TimeMixer` | `AutoTimeMixer` | MLP | Multivariate | Direct | - | \n",
diff --git a/nbs/losses.pytorch.ipynb b/nbs/losses.pytorch.ipynb
index d8d333dd7..70cceb571 100644
--- a/nbs/losses.pytorch.ipynb
+++ b/nbs/losses.pytorch.ipynb
@@ -54,9 +54,8 @@
"outputs": [],
"source": [
"#| export\n",
- "from typing import Optional, Union, Tuple\n",
+ "from typing import Optional, Union, Tuple, List\n",
"\n",
- "import math\n",
"import numpy as np\n",
"import torch\n",
"\n",
@@ -70,6 +69,9 @@
" Poisson,\n",
" NegativeBinomial,\n",
" Beta,\n",
+ " Gamma,\n",
+ " MixtureSameFamily,\n",
+ " Categorical,\n",
" AffineTransform, \n",
" TransformedDistribution,\n",
")\n",
@@ -140,7 +142,7 @@
" `outputsize_multiplier`: Multiplier for the output size.
\n",
" `output_names`: Names of the outputs.
\n",
" \"\"\"\n",
- " def __init__(self, horizon_weight, outputsize_multiplier, output_names):\n",
+ " def __init__(self, horizon_weight=None, outputsize_multiplier=None, output_names=None):\n",
" super(BasePointLoss, self).__init__()\n",
" if horizon_weight is not None:\n",
" horizon_weight = torch.Tensor(horizon_weight.flatten())\n",
@@ -151,10 +153,13 @@
"\n",
" def domain_map(self, y_hat: torch.Tensor):\n",
" \"\"\"\n",
- " Univariate loss operates in dimension [B,T,H]/[B,H]\n",
- " This changes the network's output from [B,H,1]->[B,H]\n",
+ " Input:\n",
+ " Univariate: [B, H, 1]\n",
+ " Multivariate: [B, H, N]\n",
+ "\n",
+ " Output: [B, H, N]\n",
" \"\"\"\n",
- " return y_hat.squeeze(-1)\n",
+ " return y_hat\n",
"\n",
" def _compute_weights(self, y, mask):\n",
" \"\"\"\n",
@@ -163,16 +168,17 @@
" If set, check that it has the same length as the horizon in x.\n",
" \"\"\"\n",
" if mask is None:\n",
- " mask = torch.ones_like(y, device=y.device)\n",
+ " mask = torch.ones_like(y)\n",
"\n",
" if self.horizon_weight is None:\n",
- " self.horizon_weight = torch.ones(mask.shape[-1])\n",
+ " weights = torch.ones_like(mask)\n",
" else:\n",
- " assert mask.shape[-1] == len(self.horizon_weight), \\\n",
+ " assert mask.shape[1] == len(self.horizon_weight), \\\n",
" 'horizon_weight must have same length as Y'\n",
- "\n",
- " weights = self.horizon_weight.clone()\n",
- " weights = torch.ones_like(mask, device=mask.device) * weights.to(mask.device)\n",
+ " weights = self.horizon_weight.clone()\n",
+ " weights = weights[None, :, None].to(mask.device)\n",
+ " weights = torch.ones_like(mask, device=mask.device) * weights\n",
+ " \n",
" return weights * mask"
]
},
@@ -227,7 +233,8 @@
" def __call__(self,\n",
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " y_insample: Union[torch.Tensor, None] = None) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:**
\n",
" `y`: tensor, Actual values.
\n",
@@ -311,7 +318,9 @@
" def __call__(self,\n",
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " y_insample: torch.Tensor,\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " ) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:**
\n",
" `y`: tensor, Actual values.
\n",
@@ -398,7 +407,8 @@
" def __call__(self,\n",
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " y_insample: Union[torch.Tensor, None] = None) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:**
\n",
" `y`: tensor, Actual values.
\n",
@@ -498,7 +508,9 @@
" def __call__(self,\n",
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " y_insample: torch.Tensor,\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " ) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:**
\n",
" `y`: tensor, Actual values.
\n",
@@ -590,7 +602,8 @@
" def __call__(self,\n",
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " y_insample: Union[torch.Tensor, None] = None) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:**
\n",
" `y`: tensor, Actual values.
\n",
@@ -685,12 +698,13 @@
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
" y_insample: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " ) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:**
\n",
" `y`: tensor (batch_size, output_size), Actual values.
\n",
" `y_hat`: tensor (batch_size, output_size)), Predicted values.
\n",
- " `y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions.
\n",
+ " `y_insample`: tensor (batch_size, input_size), Actual insample values.
\n",
" `mask`: tensor, Specifies date stamps per serie to consider in loss.
\n",
"\n",
" **Returns:**
\n",
@@ -699,7 +713,7 @@
" delta_y = torch.abs(y - y_hat)\n",
" scale = torch.mean(torch.abs(y_insample[:, self.seasonality:] - \\\n",
" y_insample[:, :-self.seasonality]), axis=1)\n",
- " losses = _divide_no_nan(delta_y, scale[:, None])\n",
+ " losses = _divide_no_nan(delta_y, scale[:, None, None])\n",
" weights = self._compute_weights(y=y, mask=mask)\n",
" return _weighted_mean(losses=losses, weights=weights)"
]
@@ -754,11 +768,11 @@
" \"\"\"Relative Mean Squared Error\n",
" Computes Relative Mean Squared Error (relMSE), as proposed by Hyndman & Koehler (2006)\n",
" as an alternative to percentage errors, to avoid measure unstability.\n",
- " $$ \\mathrm{relMSE}(\\\\mathbf{y}, \\\\mathbf{\\hat{y}}, \\\\mathbf{\\hat{y}}^{naive1}) =\n",
- " \\\\frac{\\mathrm{MSE}(\\\\mathbf{y}, \\\\mathbf{\\hat{y}})}{\\mathrm{MSE}(\\\\mathbf{y}, \\\\mathbf{\\hat{y}}^{naive1})} $$\n",
+ " $$ \\mathrm{relMSE}(\\\\mathbf{y}, \\\\mathbf{\\hat{y}}, \\\\mathbf{\\hat{y}}^{benchmark}) =\n",
+ " \\\\frac{\\mathrm{MSE}(\\\\mathbf{y}, \\\\mathbf{\\hat{y}})}{\\mathrm{MSE}(\\\\mathbf{y}, \\\\mathbf{\\hat{y}}^{benchmark})} $$\n",
"\n",
" **Parameters:**
\n",
- " `y_train`: numpy array, Training values.
\n",
+ " `y_train`: numpy array, deprecated.
\n",
" `horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n",
"\n",
" **References:**
\n",
@@ -769,32 +783,31 @@
" \"Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. \n",
" Submitted to the International Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)\n",
" \"\"\"\n",
- " def __init__(self, y_train, horizon_weight=None):\n",
+ " def __init__(self, y_train=None, horizon_weight=None):\n",
" super(relMSE, self).__init__(horizon_weight=horizon_weight,\n",
" outputsize_multiplier=1,\n",
" output_names=[''])\n",
- " self.y_train = y_train\n",
+ " if y_train is not None:\n",
+ " raise DeprecationWarning(\"y_train will be deprecated in a future release.\")\n",
" self.mse = MSE(horizon_weight=horizon_weight)\n",
"\n",
" def __call__(self,\n",
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " y_benchmark: torch.Tensor,\n",
+ " mask: Union[torch.Tensor, None] = None\n",
+ " ) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:**
\n",
" `y`: tensor (batch_size, output_size), Actual values.
\n",
" `y_hat`: tensor (batch_size, output_size)), Predicted values.
\n",
- " `y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions.
\n",
+ " `y_benchmark`: tensor (batch_size, output_size), Benchmark predicted values.
\n",
" `mask`: tensor, Specifies date stamps per serie to consider in loss.
\n",
"\n",
" **Returns:**
\n",
" `relMSE`: tensor (single value).\n",
" \"\"\"\n",
- " horizon = y.shape[-1]\n",
- " last_col = self.y_train[:, -1].unsqueeze(1)\n",
- " y_naive = last_col.repeat(1, horizon)\n",
- "\n",
- " norm = self.mse(y=y, y_hat=y_naive, mask=mask) # Already weighted\n",
+ " norm = self.mse(y=y, y_hat=y_benchmark, mask=mask) # Already weighted\n",
" norm = norm + 1e-5 # Numerical stability\n",
" loss = self.mse(y=y, y_hat=y_hat, mask=mask) # Already weighted\n",
" loss = _divide_no_nan(loss, norm)\n",
@@ -880,7 +893,9 @@
" def __call__(self,\n",
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " y_insample: torch.Tensor,\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " ) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:**
\n",
" `y`: tensor, Actual values.
\n",
@@ -1022,35 +1037,47 @@
"\n",
" def domain_map(self, y_hat: torch.Tensor):\n",
" \"\"\"\n",
- " Identity domain map [B,T,H,Q]/[B,H,Q]\n",
+ " Input:\n",
+ " Univariate: [B, H, 1 * Q]\n",
+ " Multivariate: [B, H, N * Q]\n",
+ "\n",
+ " Output: [B, H, N, Q]\n",
" \"\"\"\n",
- " return y_hat\n",
- " \n",
+ " output = y_hat.reshape(y_hat.shape[0],\n",
+ " y_hat.shape[1],\n",
+ " -1,\n",
+ " self.outputsize_multiplier)\n",
+ "\n",
+ " return output\n",
+ "\n",
" def _compute_weights(self, y, mask):\n",
" \"\"\"\n",
" Compute final weights for each datapoint (based on all weights and all masks)\n",
" Set horizon_weight to a ones[H] tensor if not set.\n",
" If set, check that it has the same length as the horizon in x.\n",
+ "\n",
+ " y: [B, h, N, 1]\n",
+ " mask: [B, h, N, 1]\n",
" \"\"\"\n",
- " if mask is None:\n",
- " mask = torch.ones_like(y, device=y.device)\n",
- " else:\n",
- " mask = mask.unsqueeze(1) # Add Q dimension.\n",
"\n",
" if self.horizon_weight is None:\n",
- " self.horizon_weight = torch.ones(mask.shape[-1])\n",
+ " weights = torch.ones_like(mask)\n",
" else:\n",
- " assert mask.shape[-1] == len(self.horizon_weight), \\\n",
- " 'horizon_weight must have same length as Y'\n",
- " \n",
- " weights = self.horizon_weight.clone()\n",
- " weights = torch.ones_like(mask, device=mask.device) * weights.to(mask.device)\n",
+ " assert mask.shape[1] == len(self.horizon_weight), \\\n",
+ " 'horizon_weight must have same length as Y' \n",
+ " weights = self.horizon_weight.clone()\n",
+ " weights = weights[None, :, None, None]\n",
+ " weights = weights.to(mask.device)\n",
+ " weights = torch.ones_like(mask, device=mask.device) * weights\n",
+ " \n",
" return weights * mask\n",
"\n",
" def __call__(self,\n",
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " y_insample: torch.Tensor,\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " ) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:**
\n",
" `y`: tensor, Actual values.
\n",
@@ -1060,20 +1087,24 @@
" **Returns:**
\n",
" `mqloss`: tensor (single value).\n",
" \"\"\"\n",
- " \n",
- " error = y_hat - y.unsqueeze(-1)\n",
- " sq = torch.maximum(-error, torch.zeros_like(error))\n",
- " s1_q = torch.maximum(error, torch.zeros_like(error))\n",
- " losses = (1/len(self.quantiles))*(self.quantiles * sq + (1 - self.quantiles) * s1_q)\n",
+ " # [B, h, N] -> [B, h, N, 1]\n",
+ " if y_hat.ndim == 3:\n",
+ " y_hat = y_hat.unsqueeze(-1)\n",
+ "\n",
+ " y = y.unsqueeze(-1)\n",
+ " if mask is not None:\n",
+ " mask = mask.unsqueeze(-1)\n",
+ " else:\n",
+ " mask = torch.ones_like(y, device=y.device)\n",
"\n",
- " if y_hat.ndim == 3: # BaseWindows\n",
- " losses = losses.swapaxes(-2,-1) # [B,H,Q] -> [B,Q,H] (needed for horizon weighting, H at the end)\n",
- " elif y_hat.ndim == 4: # BaseRecurrent\n",
- " losses = losses.swapaxes(-2,-1)\n",
- " losses = losses.swapaxes(-2,-3) # [B,seq_len,H,Q] -> [B,Q,seq_len,H] (needed for horizon weighting, H at the end)\n",
+ " error = y_hat - y\n",
"\n",
+ " sq = torch.maximum(-error, torch.zeros_like(error))\n",
+ " s1_q = torch.maximum(error, torch.zeros_like(error))\n",
+ " \n",
+ " quantiles = self.quantiles[None, None, None, :]\n",
+ " losses = (1 / len(quantiles)) * (quantiles * sq + (1 - quantiles) * s1_q)\n",
" weights = self._compute_weights(y=losses, mask=mask) # Use losses for extra dim\n",
- " # NOTE: Weights do not have Q dimension.\n",
"\n",
" return _weighted_mean(losses=losses, weights=weights)"
]
@@ -1228,9 +1259,9 @@
" self.sampling_distr = Beta(concentration0 = concentration0,\n",
" concentration1 = concentration1)\n",
"\n",
- " def update_quantile(self, q: float = 0.5):\n",
- " self.q = q\n",
- " self.output_names = [f\"_ql{q}\"]\n",
+ " def update_quantile(self, q: List[float] = [0.5]):\n",
+ " self.q = q[0]\n",
+ " self.output_names = [f\"_ql{q[0]}\"]\n",
" self.has_predicted = True\n",
"\n",
" def domain_map(self, y_hat):\n",
@@ -1239,9 +1270,8 @@
"\n",
" Input shapes to this function:\n",
" \n",
- " base_windows: y_hat = [B, h, 1] \n",
- " base_multivariate: y_hat = [B, h, n_series]\n",
- " base_recurrent: y_hat = [B, seq_len, h, n_series]\n",
+ " Univariate: y_hat = [B, h, 1] \n",
+ " Multivariate: y_hat = [B, h, N]\n",
" \"\"\"\n",
" if self.eval() and self.has_predicted:\n",
" quantiles = torch.full(size=y_hat.shape, \n",
@@ -1259,7 +1289,7 @@
" emb_outputs = self.output_layer(emb_inputs)\n",
" \n",
" # Domain map\n",
- " y_hat = emb_outputs.squeeze(-1).squeeze(-1)\n",
+ " y_hat = emb_outputs.squeeze(-1)\n",
"\n",
" return y_hat\n"
]
@@ -1299,7 +1329,7 @@
"\n",
"# Check that quantiles are correctly updated - prediction\n",
"check = IQLoss()\n",
- "check.update_quantile(0.7)\n",
+ "check.update_quantile([0.7])\n",
"test_eq(check.q, 0.7)"
]
},
@@ -1357,19 +1387,6 @@
"outputs": [],
"source": [
"#| exporti\n",
- "def bernoulli_domain_map(input: torch.Tensor):\n",
- " \"\"\" Bernoulli Domain Map\n",
- " Maps input into distribution constraints, by construction input's \n",
- " last dimension is of matching `distr_args` length.\n",
- "\n",
- " **Parameters:**
\n",
- " `input`: tensor, of dimensions [B,T,H,theta] or [B,H,theta].
\n",
- "\n",
- " **Returns:**
\n",
- " `(probs,)`: tuple with tensors of Poisson distribution arguments.
\n",
- " \"\"\"\n",
- " return (input.squeeze(-1),)\n",
- "\n",
"def bernoulli_scale_decouple(output, loc=None, scale=None):\n",
" \"\"\" Bernoulli Scale Decouple\n",
"\n",
@@ -1383,21 +1400,6 @@
" probs = F.sigmoid(probs)#.clone()\n",
" return (probs,)\n",
"\n",
- "def student_domain_map(input: torch.Tensor):\n",
- " \"\"\" Student T Domain Map\n",
- " Maps input into distribution constraints, by construction input's \n",
- " last dimension is of matching `distr_args` length.\n",
- "\n",
- " **Parameters:**
\n",
- " `input`: tensor, of dimensions [B,T,H,theta] or [B,H,theta].
\n",
- " `eps`: float, helps the initialization of scale for easier optimization.
\n",
- "\n",
- " **Returns:**
\n",
- " `(df, loc, scale)`: tuple with tensors of StudentT distribution arguments.
\n",
- " \"\"\"\n",
- " df, loc, scale = torch.tensor_split(input, 3, dim=-1)\n",
- " return df.squeeze(-1), loc.squeeze(-1), scale.squeeze(-1)\n",
- "\n",
"def student_scale_decouple(output, loc=None, scale=None, eps: float=0.1):\n",
" \"\"\" Normal Scale Decouple\n",
"\n",
@@ -1413,21 +1415,6 @@
" df = 3.0 + F.softplus(df)\n",
" return (df, mean, tscale)\n",
"\n",
- "def normal_domain_map(input: torch.Tensor):\n",
- " \"\"\" Normal Domain Map\n",
- " Maps input into distribution constraints, by construction input's \n",
- " last dimension is of matching `distr_args` length.\n",
- "\n",
- " **Parameters:**
\n",
- " `input`: tensor, of dimensions [B,T,H,theta] or [B,H,theta].
\n",
- " `eps`: float, helps the initialization of scale for easier optimization.
\n",
- "\n",
- " **Returns:**
\n",
- " `(mean, std)`: tuple with tensors of Normal distribution arguments.
\n",
- " \"\"\"\n",
- " mean, std = torch.tensor_split(input, 2, dim=-1)\n",
- " return mean.squeeze(-1), std.squeeze(-1)\n",
- "\n",
"def normal_scale_decouple(output, loc=None, scale=None, eps: float=0.2):\n",
" \"\"\" Normal Scale Decouple\n",
"\n",
@@ -1442,19 +1429,6 @@
" std = (std + eps) * scale\n",
" return (mean, std)\n",
"\n",
- "def poisson_domain_map(input: torch.Tensor):\n",
- " \"\"\" Poisson Domain Map\n",
- " Maps input into distribution constraints, by construction input's \n",
- " last dimension is of matching `distr_args` length.\n",
- "\n",
- " **Parameters:**
\n",
- " `input`: tensor, of dimensions [B,T,H,theta] or [B,H,theta].
\n",
- "\n",
- " **Returns:**
\n",
- " `(rate,)`: tuple with tensors of Poisson distribution arguments.
\n",
- " \"\"\"\n",
- " return (input.squeeze(-1),)\n",
- "\n",
"def poisson_scale_decouple(output, loc=None, scale=None):\n",
" \"\"\" Poisson Scale Decouple\n",
"\n",
@@ -1467,21 +1441,7 @@
" if (loc is not None) and (scale is not None):\n",
" rate = (rate * scale) + loc\n",
" rate = F.softplus(rate) + eps\n",
- " return (rate,)\n",
- "\n",
- "def nbinomial_domain_map(input: torch.Tensor):\n",
- " \"\"\" Negative Binomial Domain Map\n",
- " Maps input into distribution constraints, by construction input's \n",
- " last dimension is of matching `distr_args` length.\n",
- "\n",
- " **Parameters:**
\n",
- " `input`: tensor, of dimensions [B,T,H,theta] or [B,H,theta].
\n",
- "\n",
- " **Returns:**
\n",
- " `(total_count, alpha)`: tuple with tensors of N.Binomial distribution arguments.
\n",
- " \"\"\"\n",
- " mu, alpha = torch.tensor_split(input, 2, dim=-1)\n",
- " return mu.squeeze(-1), alpha.squeeze(-1)\n",
+ " return (rate, )\n",
"\n",
"def nbinomial_scale_decouple(output, loc=None, scale=None):\n",
" \"\"\" Negative Binomial Scale Decouple\n",
@@ -1550,10 +1510,12 @@
" - [Jorgensen, B. (1987). Exponential Dispersion Models. Journal of the Royal Statistical Society. \n",
" Series B (Methodological), 49(2), 127–162. http://www.jstor.org/stable/2345415](http://www.jstor.org/stable/2345415)
\n",
" \"\"\"\n",
+ " arg_constraints = {'log_mu': constraints.real}\n",
+ " support = constraints.nonnegative\n",
+ "\n",
" def __init__(self, log_mu, rho, validate_args=None):\n",
" # TODO: add sigma2 dispersion\n",
" # TODO add constraints\n",
- " # arg_constraints = {'log_mu': constraints.real, 'rho': constraints.positive}\n",
" # support = constraints.real\n",
" self.log_mu = log_mu\n",
" self.rho = rho\n",
@@ -1587,7 +1549,7 @@
" beta = beta.expand(shape)\n",
"\n",
" N = torch.poisson(rate) + 1e-5\n",
- " gamma = torch.distributions.gamma.Gamma(N * alpha, beta)\n",
+ " gamma = Gamma(N*alpha, beta)\n",
" samples = gamma.sample()\n",
" samples[N==0] = 0\n",
"\n",
@@ -1602,12 +1564,12 @@
"\n",
" return a - b\n",
"\n",
- "def tweedie_domain_map(input: torch.Tensor):\n",
+ "def tweedie_domain_map(input: torch.Tensor, rho: float = 1.5):\n",
" \"\"\"\n",
" Maps output of neural network to domain of distribution loss\n",
"\n",
" \"\"\"\n",
- " return (input.squeeze(-1),)\n",
+ " return (input, rho)\n",
"\n",
"def tweedie_scale_decouple(output, loc=None, scale=None):\n",
" \"\"\"Tweedie Scale Decouple\n",
@@ -1616,14 +1578,14 @@
" count and logits based on anchoring `loc`, `scale`.\n",
" Also adds Tweedie domain protection to the distribution parameters.\n",
" \"\"\"\n",
- " log_mu = output[0]\n",
+ " log_mu, rho = output\n",
" log_mu = F.softplus(log_mu)\n",
" log_mu = torch.clamp(log_mu, 1e-9, 37)\n",
" if (loc is not None) and (scale is not None):\n",
" log_mu += torch.log(loc)\n",
"\n",
" log_mu = torch.clamp(log_mu, 1e-9, 37)\n",
- " return (log_mu,)"
+ " return (log_mu, rho)"
]
},
{
@@ -1687,6 +1649,15 @@
" scale *= t.scale\n",
" p = self.base_dist.crps(z)\n",
" return p * scale\n",
+ " \n",
+ " @property\n",
+ " def mean(self):\n",
+ " \"\"\"\n",
+ " Function used to compute the empirical mean\n",
+ " \"\"\"\n",
+ " samples = self.sample([1000])\n",
+ " return samples.mean(dim=0)\n",
+ " \n",
"\n",
"class BaseISQF(Distribution):\n",
" \"\"\"\n",
@@ -2357,7 +2328,7 @@
" last dimension is of matching `distr_args` length.\n",
"\n",
" **Parameters:**
\n",
- " `input`: tensor, of dimensions [B,T,H,theta] or [B,H,theta].
\n",
+ " `input`: tensor, of dimensions [B, H, N * n_outputs].
\n",
" `tol`: float, tolerance.
\n",
" `quantiles`: tensor, quantiles used for ISQF (i.e. x-positions for the knots).
\n",
" `num_pieces`: int, num_pieces used for each quantile spline.
\n",
@@ -2371,7 +2342,14 @@
" #\n",
" # Because in this case the spline knots could be squeezed together\n",
" # and cause overflow in spline CRPS computation\n",
- " num_qk = len(quantiles) \n",
+ " num_qk = len(quantiles)\n",
+ " n_outputs = 2 * (num_qk - 1) * num_pieces + 2 + num_qk\n",
+ " \n",
+ " # Reshape: [B, h, N * n_outputs] -> [B, h, N, n_outputs]\n",
+ " input = input.reshape(input.shape[0],\n",
+ " input.shape[1],\n",
+ " -1,\n",
+ " n_outputs)\n",
" start_index = 0\n",
" spline_knots = input[..., start_index: start_index + (num_qk - 1) * num_pieces]\n",
" start_index += (num_qk - 1) * num_pieces\n",
@@ -2381,27 +2359,19 @@
" start_index += 1\n",
" beta_r = input[..., start_index: start_index + 1]\n",
" start_index += 1\n",
- " quantile_knots = input[..., start_index: start_index + num_qk]\n",
- "\n",
- " qk_y = torch.cat(\n",
- " [\n",
- " quantile_knots[..., 0:1],\n",
- " torch.abs(quantile_knots[..., 1:]) + tol,\n",
- " ],\n",
- " dim=-1,\n",
- " )\n",
- " qk_y = torch.cumsum(qk_y, dim=-1)\n",
+ " quantile_knots = F.softplus(input[..., start_index: start_index + num_qk]) + tol\n",
+ "\n",
+ " qk_y = torch.cumsum(quantile_knots, dim=-1)\n",
"\n",
" # Prevent overflow when we compute 1/beta\n",
- " beta_l = torch.abs(beta_l.squeeze(-1)) + tol\n",
- " beta_r = torch.abs(beta_r.squeeze(-1)) + tol\n",
+ " beta_l = F.softplus(beta_l.squeeze(-1)) + tol\n",
+ " beta_r = F.softplus(beta_r.squeeze(-1)) + tol\n",
"\n",
" # Reshape spline arguments\n",
" batch_shape = spline_knots.shape[:-1]\n",
"\n",
" # repeat qk_x from (num_qk,) to (*batch_shape, num_qk)\n",
- " qk_x_repeat = torch.sort(quantiles)\\\n",
- " .values\\\n",
+ " qk_x_repeat = quantiles\\\n",
" .repeat(*batch_shape, 1)\\\n",
" .to(input.device)\n",
"\n",
@@ -2502,15 +2472,6 @@
" NegativeBinomial=NegativeBinomial,\n",
" Tweedie=Tweedie,\n",
" ISQF=ISQF)\n",
- " domain_maps = dict(Bernoulli=bernoulli_domain_map,\n",
- " Normal=normal_domain_map,\n",
- " Poisson=poisson_domain_map,\n",
- " StudentT=student_domain_map,\n",
- " NegativeBinomial=nbinomial_domain_map,\n",
- " Tweedie=tweedie_domain_map,\n",
- " ISQF=partial(isqf_domain_map, \n",
- " quantiles=qs, \n",
- " num_pieces=num_pieces))\n",
" scale_decouples = dict(\n",
" Bernoulli=bernoulli_scale_decouple,\n",
" Normal=normal_scale_decouple,\n",
@@ -2531,9 +2492,24 @@
" [f\"-quantile_knot_{i + 1}\" for i in range(num_qk)],\n",
" )\n",
" assert (distribution in available_distributions.keys()), f'{distribution} not available'\n",
+ " if distribution == 'ISQF':\n",
+ " quantiles = torch.sort(qs).values\n",
+ " self.domain_map = partial(isqf_domain_map, \n",
+ " quantiles=quantiles, \n",
+ " num_pieces=num_pieces)\n",
+ " if return_params:\n",
+ " raise Exception(\"ISQF does not support 'return_params=True'\") \n",
+ " elif distribution == 'Tweedie':\n",
+ " rho = distribution_kwargs.pop(\"rho\")\n",
+ " self.domain_map = partial(tweedie_domain_map,\n",
+ " rho=rho)\n",
+ " if return_params:\n",
+ " raise Exception(\"Tweedie does not support 'return_params=True'\") \n",
+ " else:\n",
+ " self.domain_map = self._domain_map\n",
+ "\n",
" self.distribution = distribution\n",
" self._base_distribution = available_distributions[distribution]\n",
- " self.domain_map = domain_maps[distribution]\n",
" self.scale_decouple = scale_decouples[distribution]\n",
" self.distribution_kwargs = distribution_kwargs\n",
" self.num_samples = num_samples \n",
@@ -2549,6 +2525,16 @@
"\n",
" self.outputsize_multiplier = len(self.param_names)\n",
" self.is_distribution_output = True\n",
+ " self.has_predicted = False\n",
+ "\n",
+ " def _domain_map(self, input: torch.Tensor):\n",
+ " \"\"\"\n",
+ " Maps output of neural network to domain of distribution loss\n",
+ "\n",
+ " \"\"\"\n",
+ " output = torch.tensor_split(input, self.outputsize_multiplier, dim=2)\n",
+ "\n",
+ " return output\n",
"\n",
" def get_distribution(self, distr_args, **distribution_kwargs) -> Distribution:\n",
" \"\"\"\n",
@@ -2561,10 +2547,10 @@
" **Returns**
\n",
" `Distribution`: AffineTransformed distribution.
\n",
" \"\"\"\n",
- " # TransformedDistribution(distr, [AffineTransform(loc=loc, scale=scale)])\n",
" distr = self._base_distribution(*distr_args, **distribution_kwargs)\n",
+ " self.distr_mean = distr.mean\n",
" \n",
- " if self.distribution =='Poisson':\n",
+ " if self.distribution in ('Poisson', 'NegativeBinomial'):\n",
" distr.support = constraints.nonnegative\n",
" return distr\n",
"\n",
@@ -2577,7 +2563,7 @@
"\n",
" **Parameters**
\n",
" `distr_args`: Constructor arguments for the underlying Distribution type.
\n",
- " `num_samples`: int=500, overwrite number of samples for the empirical quantiles.
\n",
+ " `num_samples`: int, overwrite number of samples for the empirical quantiles.
\n",
"\n",
" **Returns**
\n",
" `samples`: tensor, shape [B,H,`num_samples`].
\n",
@@ -2586,30 +2572,31 @@
" if num_samples is None:\n",
" num_samples = self.num_samples\n",
"\n",
- " # print(distr_args[0].size())\n",
- " B, H = distr_args[0].shape[:2]\n",
- " Q = len(self.quantiles)\n",
- "\n",
" # Instantiate Scaled Decoupled Distribution\n",
" distr = self.get_distribution(distr_args=distr_args, **self.distribution_kwargs)\n",
" samples = distr.sample(sample_shape=(num_samples,))\n",
- " samples = samples.permute(1,2,0) # [samples,B,H] -> [B,H,samples]\n",
- " samples = samples.view(B*H, num_samples)\n",
- " sample_mean = torch.mean(samples, dim=-1)\n",
+ " samples = samples.permute(1, 2, 3, 0) # [samples, B, H, N] -> [B, H, N, samples]\n",
+ "\n",
+ " sample_mean = torch.mean(samples, dim=-1, keepdim=True) \n",
"\n",
" # Compute quantiles\n",
" quantiles_device = self.quantiles.to(distr_args[0].device)\n",
" quants = torch.quantile(input=samples, \n",
- " q=quantiles_device, dim=1)\n",
- " quants = quants.permute((1,0)) # [Q, B*H] -> [B*H, Q]\n",
- "\n",
- " # Final reshapes\n",
- " samples = samples.view(B, H, num_samples)\n",
- " sample_mean = sample_mean.view(B, H, 1)\n",
- " quants = quants.view(B, H, Q)\n",
+ " q=quantiles_device, \n",
+ " dim=-1)\n",
+ " quants = quants.permute(1, 2, 3, 0) # [Q, B, H, N] -> [B, H, N, Q]\n",
"\n",
" return samples, sample_mean, quants\n",
"\n",
+ " def update_quantile(self, q: Optional[List[float]] = None):\n",
+ " if q is not None:\n",
+ " self.quantiles = nn.Parameter(torch.tensor(q, dtype=torch.float32), requires_grad=False)\n",
+ " self.output_names = [\"\"] + [f\"_ql{q_i}\" for q_i in q] + self.return_params * self.param_names\n",
+ " self.has_predicted = True\n",
+ " elif q is None and self.has_predicted:\n",
+ " self.quantiles = nn.Parameter(torch.tensor([0.5], dtype=torch.float32), requires_grad=False)\n",
+ " self.output_names = [\"\", \"-median\"] + self.return_params * self.param_names\n",
+ "\n",
" def __call__(self,\n",
" y: torch.Tensor,\n",
" distr_args: torch.Tensor,\n",
@@ -2626,10 +2613,6 @@
" **Parameters**
\n",
" `y`: tensor, Actual values.
\n",
" `distr_args`: Constructor arguments for the underlying Distribution type.
\n",
- " `loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n",
- " of the resulting distribution.
\n",
- " `scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n",
- " of the resulting distribution.
\n",
" `mask`: tensor, Specifies date stamps per serie to consider in loss.
\n",
"\n",
" **Returns**
\n",
@@ -2739,7 +2722,8 @@
" \"\"\"\n",
" def __init__(self, n_components=10, level=[80, 90], quantiles=None,\n",
" num_samples=1000, return_params=False,\n",
- " batch_correlation=False, horizon_correlation=False):\n",
+ " batch_correlation=False, horizon_correlation=False, \n",
+ " weighted=False):\n",
" super(PMM, self).__init__()\n",
" # Transform level to MQLoss parameters\n",
" qs, self.output_names = level_to_outputs(level)\n",
@@ -2753,21 +2737,37 @@
" self.num_samples = num_samples\n",
" self.batch_correlation = batch_correlation\n",
" self.horizon_correlation = horizon_correlation\n",
+ " self.weighted = weighted \n",
"\n",
" # If True, predict_step will return Distribution's parameters\n",
" self.return_params = return_params\n",
- " if self.return_params:\n",
- " self.param_names = [f\"-lambda-{i}\" for i in range(1, n_components + 1)]\n",
+ "\n",
+ " lambda_names = [f\"-lambda-{i}\" for i in range(1, n_components + 1)]\n",
+ " if weighted:\n",
+ " weight_names = [f\"-weight-{i}\" for i in range(1, n_components + 1)]\n",
+ " self.param_names = [i for j in zip(lambda_names, weight_names) for i in j]\n",
+ " else:\n",
+ " self.param_names = lambda_names\n",
+ "\n",
+ " if self.return_params: \n",
" self.output_names = self.output_names + self.param_names\n",
"\n",
" # Add first output entry for the sample_mean\n",
" self.output_names.insert(0, \"\")\n",
"\n",
- " self.outputsize_multiplier = n_components\n",
+ " self.n_outputs = 1 + weighted\n",
+ " self.n_components = n_components\n",
+ " self.outputsize_multiplier = self.n_outputs * n_components\n",
" self.is_distribution_output = True\n",
+ " self.has_predicted = False\n",
"\n",
" def domain_map(self, output: torch.Tensor):\n",
- " return (output,)#, weights\n",
+ " output = output.reshape(output.shape[0],\n",
+ " output.shape[1],\n",
+ " -1,\n",
+ " self.outputsize_multiplier)\n",
+ " \n",
+ " return torch.tensor_split(output, self.n_outputs, dim=-1)\n",
" \n",
" def scale_decouple(self, \n",
" output,\n",
@@ -2779,26 +2779,62 @@
" variance and residual location based on anchoring `loc`, `scale`.\n",
" Also adds domain protection to the distribution parameters.\n",
" \"\"\"\n",
- " lambdas = output[0]\n",
+ " if self.weighted:\n",
+ " lambdas, weights = output\n",
+ " weights = F.softmax(weights, dim=-1)\n",
+ " else:\n",
+ " lambdas = output[0]\n",
+ "\n",
" if (loc is not None) and (scale is not None):\n",
- " loc = loc.view(lambdas.size(dim=0), 1, -1)\n",
- " scale = scale.view(lambdas.size(dim=0), 1, -1)\n",
+ " if loc.ndim == 3:\n",
+ " loc = loc.unsqueeze(-1)\n",
+ " scale = scale.unsqueeze(-1)\n",
" lambdas = (lambdas * scale) + loc\n",
- " lambdas = F.softplus(lambdas)\n",
- " return (lambdas,)\n",
"\n",
- " def sample(self, distr_args, num_samples=None):\n",
+ " lambdas = F.softplus(lambdas) + 1e-3\n",
+ " \n",
+ " if self.weighted:\n",
+ " return (lambdas, weights)\n",
+ " else:\n",
+ " return (lambdas, )\n",
+ " \n",
+ " def get_distribution(self, distr_args) -> Distribution:\n",
+ " \"\"\"\n",
+ " Construct the associated Pytorch Distribution, given the collection of\n",
+ " constructor arguments and, optionally, location and scale tensors.\n",
+ "\n",
+ " **Parameters**
\n",
+ " `distr_args`: Constructor arguments for the underlying Distribution type.
\n",
+ "\n",
+ " **Returns**
\n",
+ " `Distribution`: AffineTransformed distribution.
\n",
+ " \"\"\"\n",
+ " if self.weighted:\n",
+ " lambdas, weights = distr_args\n",
+ " else:\n",
+ " lambdas = distr_args[0]\n",
+ " weights = torch.full_like(lambdas, fill_value=1 / self.n_components)\n",
+ "\n",
+ " mix = Categorical(weights)\n",
+ " components = Poisson(rate=lambdas)\n",
+ " components.support = constraints.nonnegative\n",
+ " distr = MixtureSameFamily(mixture_distribution=mix,\n",
+ " component_distribution=components) \n",
+ "\n",
+ " self.distr_mean = distr.mean\n",
+ " \n",
+ " return distr\n",
+ "\n",
+ " def sample(self,\n",
+ " distr_args: torch.Tensor,\n",
+ " num_samples: Optional[int] = None):\n",
" \"\"\"\n",
" Construct the empirical quantiles from the estimated Distribution,\n",
" sampling from it `num_samples` independently.\n",
"\n",
" **Parameters**
\n",
" `distr_args`: Constructor arguments for the underlying Distribution type.
\n",
- " `loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n",
- " of the resulting distribution.
\n",
- " `scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n",
- " of the resulting distribution.
\n",
- " `num_samples`: int=500, overwrites number of samples for the empirical quantiles.
\n",
+ " `num_samples`: int, overwrite number of samples for the empirical quantiles.
\n",
"\n",
" **Returns**
\n",
" `samples`: tensor, shape [B,H,`num_samples`].
\n",
@@ -2807,93 +2843,65 @@
" if num_samples is None:\n",
" num_samples = self.num_samples\n",
"\n",
- " lambdas = distr_args[0]\n",
- " B, H, K = lambdas.size()\n",
- " Q = len(self.quantiles)\n",
- "\n",
- " # Sample K ~ Mult(weights)\n",
- " # shared across B, H\n",
- " # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2)\n",
- " weights = (1/K) * torch.ones_like(lambdas, device=lambdas.device)\n",
- "\n",
- " # Avoid loop, vectorize\n",
- " weights = weights.reshape(-1, K)\n",
- " lambdas = lambdas.flatten() \n",
- "\n",
- " # Vectorization trick to recover row_idx\n",
- " sample_idxs = torch.multinomial(input=weights, \n",
- " num_samples=num_samples,\n",
- " replacement=True)\n",
- " aux_col_idx = torch.unsqueeze(torch.arange(B * H, device=lambdas.device), -1) * K\n",
- "\n",
- " # To device\n",
- " sample_idxs = sample_idxs.to(lambdas.device)\n",
- "\n",
- " sample_idxs = sample_idxs + aux_col_idx\n",
- " sample_idxs = sample_idxs.flatten()\n",
- "\n",
- " sample_lambdas = lambdas[sample_idxs]\n",
+ " # Instantiate Scaled Decoupled Distribution\n",
+ " distr = self.get_distribution(distr_args=distr_args)\n",
+ " samples = distr.sample(sample_shape=(num_samples,))\n",
+ " samples = samples.permute(1, 2, 3, 0) # [samples, B, H, N] -> [B, H, N, samples]\n",
"\n",
- " # Sample y ~ Poisson(lambda) independently\n",
- " samples = torch.poisson(sample_lambdas).to(lambdas.device)\n",
- " samples = samples.view(B*H, num_samples)\n",
- " sample_mean = torch.mean(samples, dim=-1)\n",
+ " sample_mean = torch.mean(samples, dim=-1, keepdim=True) \n",
"\n",
" # Compute quantiles\n",
- " quantiles_device = self.quantiles.to(lambdas.device)\n",
- " quants = torch.quantile(input=samples, q=quantiles_device, dim=1)\n",
- " quants = quants.permute((1,0)) # Q, B*H\n",
- "\n",
- " # Final reshapes\n",
- " samples = samples.view(B, H, num_samples)\n",
- " sample_mean = sample_mean.view(B, H, 1)\n",
- " quants = quants.view(B, H, Q)\n",
+ " quantiles_device = self.quantiles.to(distr_args[0].device)\n",
+ " quants = torch.quantile(input=samples, \n",
+ " q=quantiles_device, \n",
+ " dim=-1)\n",
+ " quants = quants.permute(1, 2, 3, 0) # [Q, B, H, N] -> [B, H, N, Q]\n",
"\n",
" return samples, sample_mean, quants\n",
" \n",
- " def neglog_likelihood(self,\n",
- " y: torch.Tensor,\n",
- " distr_args: Tuple[torch.Tensor],\n",
- " mask: Union[torch.Tensor, None] = None,):\n",
- " if mask is None: \n",
- " mask = (y > 0) * 1\n",
- " else:\n",
- " mask = mask * ((y > 0) * 1)\n",
+ " def update_quantile(self, q: Optional[List[float]] = None):\n",
+ " if q is not None:\n",
+ " self.quantiles = nn.Parameter(torch.tensor(q, dtype=torch.float32), requires_grad=False)\n",
+ " self.output_names = [\"\"] + [f\"_ql{q_i}\" for q_i in q] + self.return_params * self.param_names\n",
+ " self.has_predicted = True\n",
+ " elif q is None and self.has_predicted:\n",
+ " self.quantiles = nn.Parameter(torch.tensor([0.5], dtype=torch.float32), requires_grad=False) \n",
+ " self.output_names = [\"\", \"-median\"] + self.return_params * self.param_names\n",
"\n",
- " eps = 1e-10\n",
- " lambdas = distr_args[0]\n",
- " B, H, K = lambdas.size()\n",
+ " def __call__(self,\n",
+ " y: torch.Tensor,\n",
+ " distr_args: torch.Tensor,\n",
+ " mask: Union[torch.Tensor, None] = None):\n",
+ " \"\"\"\n",
+ " Computes the negative log-likelihood objective function. \n",
+ " To estimate the following predictive distribution:\n",
"\n",
- " weights = (1/K) * torch.ones_like(lambdas, device=lambdas.device)\n",
+ " $$\\mathrm{P}(\\mathbf{y}_{\\\\tau}\\,|\\,\\\\theta) \\\\quad \\mathrm{and} \\\\quad -\\log(\\mathrm{P}(\\mathbf{y}_{\\\\tau}\\,|\\,\\\\theta))$$\n",
"\n",
- " y = y[:,:,None]\n",
- " mask = mask[:,:,None]\n",
+ " where $\\\\theta$ represents the distributions parameters. It aditionally \n",
+ " summarizes the objective signal using a weighted average using the `mask` tensor. \n",
"\n",
- " y = y * mask # Protect y negative entries\n",
- " \n",
- " # Single Poisson likelihood\n",
- " log_pi = y.xlogy(lambdas + eps) - lambdas - (y + 1).lgamma()\n",
+ " **Parameters**
\n",
+ " `y`: tensor, Actual values.
\n",
+ " `distr_args`: Constructor arguments for the underlying Distribution type.
\n",
+ " `mask`: tensor, Specifies date stamps per serie to consider in loss.
\n",
"\n",
+ " **Returns**
\n",
+ " `loss`: scalar, weighted loss function against which backpropagation will be performed.
\n",
+ " \"\"\"\n",
+ " # Instantiate Scaled Decoupled Distribution\n",
+ " distr = self.get_distribution(distr_args=distr_args)\n",
+ " x = distr._pad(y)\n",
+ " log_prob_x = distr.component_distribution.log_prob(x)\n",
+ " log_mix_prob = torch.log_softmax(distr.mixture_distribution.logits, dim=-1)\n",
" if self.batch_correlation:\n",
- " log_pi = torch.sum(log_pi, dim=0, keepdim=True)\n",
- "\n",
+ " log_prob_x = torch.sum(log_prob_x, dim=0, keepdim=True)\n",
" if self.horizon_correlation:\n",
- " log_pi = torch.sum(log_pi, dim=1, keepdim=True)\n",
- "\n",
- " # Numerically Stable Mixture loglikelihood\n",
- " loglik = torch.logsumexp((torch.log(weights) + log_pi), dim=2, keepdim=True)\n",
- " loglik = loglik * mask\n",
- "\n",
- " mean = torch.sum(weights * lambdas, axis=-1, keepdims=True)\n",
- " reglrz = torch.mean(torch.square(y - mean) * mask)\n",
- " loss = -torch.mean(loglik) + 0.001 * reglrz\n",
- " return loss\n",
- "\n",
- " def __call__(self, y: torch.Tensor,\n",
- " distr_args: Tuple[torch.Tensor],\n",
- " mask: Union[torch.Tensor, None] = None):\n",
- "\n",
- " return self.neglog_likelihood(y=y, distr_args=distr_args, mask=mask)\n"
+ " log_prob_x = torch.sum(log_prob_x, dim=1, keepdim=True)\n",
+ " \n",
+ " loss_values = -torch.logsumexp(log_prob_x + log_mix_prob, dim=-1) \n",
+ " \n",
+ " return weighted_average(loss_values, weights=mask)\n"
]
},
{
@@ -2967,30 +2975,31 @@
"outputs": [],
"source": [
"#| hide\n",
- "# Create single mixture and broadcast to N,H,K\n",
- "weights = torch.ones((1,3))[None, :, :]\n",
- "lambdas = torch.Tensor([[5,10,15], [10,20,30]])[None, :, :]\n",
+ "# Create single mixture and broadcast to N,H,1,K\n",
+ "weights = torch.ones((1,3))[None, :, :].unsqueeze(2)\n",
+ "lambdas = torch.Tensor([[5,10,15], [10,20,30]])[None, :, :].unsqueeze(2)\n",
"\n",
"# Create repetitions for the batch dimension N.\n",
"N=2\n",
"weights = torch.repeat_interleave(input=weights, repeats=N, dim=0)\n",
"lambdas = torch.repeat_interleave(input=lambdas, repeats=N, dim=0)\n",
"\n",
- "print('weights.shape (N,H,K) \\t', weights.shape)\n",
- "print('lambdas.shape (N,H,K) \\t', lambdas.shape)\n",
+ "print('weights.shape (N,H,1,K) \\t', weights.shape)\n",
+ "print('lambdas.shape (N,H,1, K) \\t', lambdas.shape)\n",
"\n",
- "distr = PMM(quantiles=[0.1, 0.40, 0.5, 0.60, 0.9])\n",
- "distr_args = (lambdas,)\n",
+ "distr = PMM(quantiles=[0.1, 0.40, 0.5, 0.60, 0.9], weighted=True)\n",
+ "weights = torch.ones_like(lambdas)\n",
+ "distr_args = (lambdas, weights)\n",
"samples, sample_mean, quants = distr.sample(distr_args)\n",
"\n",
- "print('samples.shape (N,H,num_samples) ', samples.shape)\n",
- "print('sample_mean.shape (N,H) ', sample_mean.shape)\n",
- "print('quants.shape (N,H,Q) \\t\\t', quants.shape)\n",
+ "print('samples.shape (N,H,1,num_samples) ', samples.shape)\n",
+ "print('sample_mean.shape (N,H,1,1) ', sample_mean.shape)\n",
+ "print('quants.shape (N,H,1,Q) \\t\\t', quants.shape)\n",
"\n",
"# Plot synthethic data\n",
"x_plot = range(quants.shape[1]) # H length\n",
- "y_plot_hat = quants[0,:,:] # Filter N,G,T -> H,Q\n",
- "samples_hat = samples[0,:,:] # Filter N,G,T -> H,num_samples\n",
+ "y_plot_hat = quants[0,:,0,:] # Filter N,G,T -> H,Q\n",
+ "samples_hat = samples[0,:,0,:] # Filter N,G,T -> H,num_samples\n",
"\n",
"# Kernel density plot for single forecast horizon \\tau = t+1\n",
"fig, ax = plt.subplots(figsize=(3.7, 2.9))\n",
@@ -3065,7 +3074,8 @@
" \"\"\"\n",
" def __init__(self, n_components=1, level=[80, 90], quantiles=None, \n",
" num_samples=1000, return_params=False,\n",
- " batch_correlation=False, horizon_correlation=False):\n",
+ " batch_correlation=False, horizon_correlation=False,\n",
+ " weighted=False):\n",
" super(GMM, self).__init__()\n",
" # Transform level to MQLoss parameters\n",
" qs, self.output_names = level_to_outputs(level)\n",
@@ -3078,25 +3088,41 @@
" self.quantiles = torch.nn.Parameter(qs, requires_grad=False)\n",
" self.num_samples = num_samples\n",
" self.batch_correlation = batch_correlation\n",
- " self.horizon_correlation = horizon_correlation \n",
+ " self.horizon_correlation = horizon_correlation \n",
+ " self.weighted = weighted \n",
"\n",
" # If True, predict_step will return Distribution's parameters\n",
" self.return_params = return_params\n",
+ "\n",
+ " mu_names = [f\"-mu-{i}\" for i in range(1, n_components + 1)]\n",
+ " std_names = [f\"-std-{i}\" for i in range(1, n_components + 1)]\n",
+ " if weighted:\n",
+ " weight_names = [f\"-weight-{i}\" for i in range(1, n_components + 1)]\n",
+ " self.param_names = [\n",
+ " i for j in zip(mu_names, std_names, weight_names) for i in j\n",
+ " ]\n",
+ " else:\n",
+ " self.param_names = [i for j in zip(mu_names, std_names) for i in j]\n",
+ "\n",
" if self.return_params:\n",
- " mu_names = [f\"-mu-{i}\" for i in range(1, n_components + 1)]\n",
- " std_names = [f\"-std-{i}\" for i in range(1, n_components + 1)]\n",
- " mu_std_names = [i for j in zip(mu_names, std_names) for i in j]\n",
- " self.output_names = self.output_names + mu_std_names\n",
+ " self.output_names = self.output_names + self.param_names\n",
"\n",
" # Add first output entry for the sample_mean\n",
" self.output_names.insert(0, \"\")\n",
"\n",
- " self.outputsize_multiplier = 2 * n_components\n",
+ " self.n_outputs = 2 + weighted\n",
+ " self.n_components = n_components\n",
+ " self.outputsize_multiplier = self.n_outputs * n_components\n",
" self.is_distribution_output = True\n",
+ " self.has_predicted = False\n",
"\n",
" def domain_map(self, output: torch.Tensor):\n",
- " means, stds = torch.tensor_split(output, 2, dim=-1)\n",
- " return (means, stds)\n",
+ " output = output.reshape(output.shape[0],\n",
+ " output.shape[1],\n",
+ " -1,\n",
+ " self.outputsize_multiplier)\n",
+ " \n",
+ " return torch.tensor_split(output, self.n_outputs, dim=-1)\n",
"\n",
" def scale_decouple(self, \n",
" output,\n",
@@ -3109,27 +3135,61 @@
" variance and residual location based on anchoring `loc`, `scale`.\n",
" Also adds domain protection to the distribution parameters.\n",
" \"\"\"\n",
- " means, stds = output\n",
+ " if self.weighted:\n",
+ " means, stds, weights = output\n",
+ " weights = F.softmax(weights, dim=-1)\n",
+ " else:\n",
+ " means, stds = output\n",
+ " \n",
" stds = F.softplus(stds)\n",
" if (loc is not None) and (scale is not None):\n",
- " loc = loc.view(means.size(dim=0), 1, -1)\n",
- " scale = scale.view(means.size(dim=0), 1, -1) \n",
+ " if loc.ndim == 3:\n",
+ " loc = loc.unsqueeze(-1)\n",
+ " scale = scale.unsqueeze(-1)\n",
" means = (means * scale) + loc\n",
" stds = (stds + eps) * scale\n",
- " return (means, stds)\n",
+ " \n",
+ " if self.weighted:\n",
+ " return (means, stds, weights)\n",
+ " else:\n",
+ " return (means, stds)\n",
+ "\n",
+ " def get_distribution(self, distr_args) -> Distribution:\n",
+ " \"\"\"\n",
+ " Construct the associated Pytorch Distribution, given the collection of\n",
+ " constructor arguments and, optionally, location and scale tensors.\n",
"\n",
- " def sample(self, distr_args, num_samples=None):\n",
+ " **Parameters**
\n",
+ " `distr_args`: Constructor arguments for the underlying Distribution type.
\n",
+ "\n",
+ " **Returns**
\n",
+ " `Distribution`: AffineTransformed distribution.
\n",
+ " \"\"\"\n",
+ " if self.weighted:\n",
+ " means, stds, weights = distr_args\n",
+ " else:\n",
+ " means, stds = distr_args\n",
+ " weights = torch.full_like(means, fill_value=1 / self.n_components)\n",
+ " \n",
+ " mix = Categorical(weights)\n",
+ " components = Normal(loc=means, scale=stds)\n",
+ " distr = MixtureSameFamily(mixture_distribution=mix,\n",
+ " component_distribution=components) \n",
+ "\n",
+ " self.distr_mean = distr.mean\n",
+ " \n",
+ " return distr\n",
+ "\n",
+ " def sample(self,\n",
+ " distr_args: torch.Tensor,\n",
+ " num_samples: Optional[int] = None):\n",
" \"\"\"\n",
" Construct the empirical quantiles from the estimated Distribution,\n",
" sampling from it `num_samples` independently.\n",
"\n",
" **Parameters**
\n",
" `distr_args`: Constructor arguments for the underlying Distribution type.
\n",
- " `loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n",
- " of the resulting distribution.
\n",
- " `scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n",
- " of the resulting distribution.
\n",
- " `num_samples`: int=500, number of samples for the empirical quantiles.
\n",
+ " `num_samples`: int, overwrite number of samples for the empirical quantiles.
\n",
"\n",
" **Returns**
\n",
" `samples`: tensor, shape [B,H,`num_samples`].
\n",
@@ -3137,94 +3197,65 @@
" \"\"\"\n",
" if num_samples is None:\n",
" num_samples = self.num_samples\n",
- " \n",
- " means, stds = distr_args\n",
- " B, H, K = means.size()\n",
- " Q = len(self.quantiles)\n",
- " assert means.shape == stds.shape\n",
- "\n",
- " # Sample K ~ Mult(weights)\n",
- " # shared across B, H\n",
- " # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2)\n",
- " \n",
- " weights = (1/K) * torch.ones_like(means, device=means.device)\n",
- " \n",
- " # Avoid loop, vectorize\n",
- " weights = weights.reshape(-1, K)\n",
- " means = means.flatten()\n",
- " stds = stds.flatten()\n",
- "\n",
- " # Vectorization trick to recover row_idx\n",
- " sample_idxs = torch.multinomial(input=weights, \n",
- " num_samples=num_samples,\n",
- " replacement=True)\n",
- " aux_col_idx = torch.unsqueeze(torch.arange(B * H, device=means.device),-1) * K\n",
- "\n",
- " # To device\n",
- " sample_idxs = sample_idxs.to(means.device)\n",
"\n",
- " sample_idxs = sample_idxs + aux_col_idx\n",
- " sample_idxs = sample_idxs.flatten()\n",
- "\n",
- " sample_means = means[sample_idxs]\n",
- " sample_stds = stds[sample_idxs]\n",
+ " # Instantiate Scaled Decoupled Distribution\n",
+ " distr = self.get_distribution(distr_args=distr_args)\n",
+ " samples = distr.sample(sample_shape=(num_samples,))\n",
+ " samples = samples.permute(1, 2, 3, 0) # [samples, B, H, N] -> [B, H, N, samples]\n",
"\n",
- " # Sample y ~ Normal(mu, std) independently\n",
- " samples = torch.normal(sample_means, sample_stds).to(means.device)\n",
- " samples = samples.view(B*H, num_samples)\n",
- " sample_mean = torch.mean(samples, dim=-1)\n",
+ " sample_mean = torch.mean(samples, dim=-1, keepdim=True) \n",
"\n",
" # Compute quantiles\n",
- " quantiles_device = self.quantiles.to(means.device)\n",
- " quants = torch.quantile(input=samples, q=quantiles_device, dim=1)\n",
- " quants = quants.permute((1,0)) # Q, B*H\n",
- "\n",
- " # Final reshapes\n",
- " samples = samples.view(B, H, num_samples)\n",
- " sample_mean = sample_mean.view(B, H, 1)\n",
- " quants = quants.view(B, H, Q)\n",
+ " quantiles_device = self.quantiles.to(distr_args[0].device)\n",
+ " quants = torch.quantile(input=samples, \n",
+ " q=quantiles_device, \n",
+ " dim=-1)\n",
+ " quants = quants.permute(1, 2, 3, 0) # [Q, B, H, N] -> [B, H, N, Q]\n",
"\n",
" return samples, sample_mean, quants\n",
+ " \n",
+ " def update_quantile(self, q: Optional[List[float]] = None):\n",
+ " if q is not None:\n",
+ " self.quantiles = nn.Parameter(torch.tensor(q, dtype=torch.float32), requires_grad=False)\n",
+ " self.output_names = [\"\"] + [f\"_ql{q_i}\" for q_i in q] + self.return_params * self.param_names\n",
+ " self.has_predicted = True\n",
+ " elif q is None and self.has_predicted:\n",
+ " self.quantiles = nn.Parameter(torch.tensor([0.5], dtype=torch.float32), requires_grad=False) \n",
+ " self.output_names = [\"\", \"-median\"] + self.return_params * self.param_names\n",
"\n",
- " def neglog_likelihood(self,\n",
- " y: torch.Tensor,\n",
- " distr_args: Tuple[torch.Tensor, torch.Tensor],\n",
- " mask: Union[torch.Tensor, None] = None):\n",
- "\n",
- " if mask is None: \n",
- " mask = torch.ones_like(y)\n",
- " \n",
- " means, stds = distr_args\n",
- " B, H, K = means.size()\n",
- " \n",
- " weights = (1/K) * torch.ones_like(means, device=means.device)\n",
- " \n",
- " y = y[:,:, None]\n",
- " mask = mask[:,:,None]\n",
- " \n",
- " var = stds ** 2\n",
- " log_stds = torch.log(stds)\n",
- " log_pi = - ((y - means) ** 2 / (2 * var)) - log_stds \\\n",
- " - math.log(math.sqrt(2 * math.pi))\n",
- "\n",
- " if self.batch_correlation:\n",
- " log_pi = torch.sum(log_pi, dim=0, keepdim=True)\n",
+ " def __call__(self,\n",
+ " y: torch.Tensor,\n",
+ " distr_args: torch.Tensor,\n",
+ " mask: Union[torch.Tensor, None] = None):\n",
+ " \"\"\"\n",
+ " Computes the negative log-likelihood objective function. \n",
+ " To estimate the following predictive distribution:\n",
"\n",
- " if self.horizon_correlation: \n",
- " log_pi = torch.sum(log_pi, dim=1, keepdim=True)\n",
+ " $$\\mathrm{P}(\\mathbf{y}_{\\\\tau}\\,|\\,\\\\theta) \\\\quad \\mathrm{and} \\\\quad -\\log(\\mathrm{P}(\\mathbf{y}_{\\\\tau}\\,|\\,\\\\theta))$$\n",
"\n",
- " # Numerically Stable Mixture loglikelihood\n",
- " loglik = torch.logsumexp((torch.log(weights) + log_pi), dim=2, keepdim=True)\n",
- " loglik = loglik * mask\n",
+ " where $\\\\theta$ represents the distributions parameters. It aditionally \n",
+ " summarizes the objective signal using a weighted average using the `mask` tensor. \n",
"\n",
- " loss = -torch.mean(loglik)\n",
- " return loss\n",
- " \n",
- " def __call__(self, y: torch.Tensor,\n",
- " distr_args: Tuple[torch.Tensor, torch.Tensor],\n",
- " mask: Union[torch.Tensor, None] = None,):\n",
+ " **Parameters**
\n",
+ " `y`: tensor, Actual values.
\n",
+ " `distr_args`: Constructor arguments for the underlying Distribution type.
\n",
+ " `mask`: tensor, Specifies date stamps per serie to consider in loss.
\n",
"\n",
- " return self.neglog_likelihood(y=y, distr_args=distr_args, mask=mask)"
+ " **Returns**
\n",
+ " `loss`: scalar, weighted loss function against which backpropagation will be performed.
\n",
+ " \"\"\"\n",
+ " # Instantiate Scaled Decoupled Distribution\n",
+ " distr = self.get_distribution(distr_args=distr_args)\n",
+ " x = distr._pad(y)\n",
+ " log_prob_x = distr.component_distribution.log_prob(x)\n",
+ " log_mix_prob = torch.log_softmax(distr.mixture_distribution.logits, dim=-1)\n",
+ " if self.batch_correlation:\n",
+ " log_prob_x = torch.sum(log_prob_x, dim=0, keepdim=True)\n",
+ " if self.horizon_correlation:\n",
+ " log_prob_x = torch.sum(log_prob_x, dim=1, keepdim=True)\n",
+ " loss_values = -torch.logsumexp(log_prob_x + log_mix_prob, dim=-1) \n",
+ " \n",
+ " return weighted_average(loss_values, weights=mask)"
]
},
{
@@ -3298,8 +3329,8 @@
"outputs": [],
"source": [
"#| hide\n",
- "# Create single mixture and broadcast to N,H,K\n",
- "means = torch.Tensor([[5,10,15], [10,20,30]])[None, :, :]\n",
+ "# Create single mixture and broadcast to N,H,1,K\n",
+ "means = torch.Tensor([[5,10,15], [10,20,30]])[None, :, :].unsqueeze(2)\n",
"\n",
"# # Create repetitions for the batch dimension N.\n",
"N=2\n",
@@ -3307,22 +3338,22 @@
"weights = torch.ones_like(means)\n",
"stds = torch.ones_like(means)\n",
"\n",
- "print('weights.shape (N,H,K) \\t', weights.shape)\n",
- "print('means.shape (N,H,K) \\t', means.shape)\n",
- "print('stds.shape (N,H,K) \\t', stds.shape)\n",
+ "print('weights.shape (N,H,1,K) \\t', weights.shape)\n",
+ "print('means.shape (N,H,1,K) \\t', means.shape)\n",
+ "print('stds.shape (N,H,1,K) \\t', stds.shape)\n",
"\n",
- "distr = GMM(quantiles=[0.1, 0.40, 0.5, 0.60, 0.9])\n",
- "distr_args = (means, stds)\n",
+ "distr = GMM(quantiles=[0.1, 0.40, 0.5, 0.60, 0.9], weighted=True)\n",
+ "distr_args = (means, stds, weights)\n",
"samples, sample_mean, quants = distr.sample(distr_args)\n",
"\n",
- "print('samples.shape (N,H,num_samples) ', samples.shape)\n",
- "print('sample_mean.shape (N,H) ', sample_mean.shape)\n",
- "print('quants.shape (N,H,Q) \\t\\t', quants.shape)\n",
+ "print('samples.shape (N,H,1,num_samples) ', samples.shape)\n",
+ "print('sample_mean.shape (N,H,1,1) ', sample_mean.shape)\n",
+ "print('quants.shape (N,H,1, Q) \\t\\t', quants.shape)\n",
"\n",
"# Plot synthethic data\n",
"x_plot = range(quants.shape[1]) # H length\n",
- "y_plot_hat = quants[0,:,:] # Filter N,G,T -> H,Q\n",
- "samples_hat = samples[0,:,:] # Filter N,G,T -> H,num_samples\n",
+ "y_plot_hat = quants[0,:,0,:] # Filter N,G,T -> H,Q\n",
+ "samples_hat = samples[0,:,0,:] # Filter N,G,T -> H,num_samples\n",
"\n",
"# Kernel density plot for single forecast horizon \\tau = t+1\n",
"fig, ax = plt.subplots(figsize=(3.7, 2.9))\n",
@@ -3396,7 +3427,7 @@
" Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)\n",
" \"\"\"\n",
" def __init__(self, n_components=1, level=[80, 90], quantiles=None, \n",
- " num_samples=1000, return_params=False):\n",
+ " num_samples=1000, return_params=False, weighted=False):\n",
" super(NBMM, self).__init__()\n",
" # Transform level to MQLoss parameters\n",
" qs, self.output_names = level_to_outputs(level)\n",
@@ -3408,24 +3439,40 @@
" qs = torch.Tensor(quantiles)\n",
" self.quantiles = torch.nn.Parameter(qs, requires_grad=False)\n",
" self.num_samples = num_samples\n",
+ " self.weighted = weighted \n",
"\n",
" # If True, predict_step will return Distribution's parameters\n",
" self.return_params = return_params\n",
+ "\n",
+ " total_count_names = [f\"-total_count-{i}\" for i in range(1, n_components + 1)]\n",
+ " probs_names = [f\"-probs-{i}\" for i in range(1, n_components + 1)]\n",
+ " if weighted:\n",
+ " weight_names = [f\"-weight-{i}\" for i in range(1, n_components + 1)]\n",
+ " self.param_names = [\n",
+ " i for j in zip(total_count_names, probs_names, weight_names) for i in j\n",
+ " ]\n",
+ " else:\n",
+ " self.param_names = [i for j in zip(total_count_names, probs_names) for i in j]\n",
+ "\n",
" if self.return_params:\n",
- " total_count_names = [f\"-total_count-{i}\" for i in range(1, n_components + 1)]\n",
- " probs_names = [f\"-probs-{i}\" for i in range(1, n_components + 1)]\n",
- " param_names = [i for j in zip(total_count_names, probs_names) for i in j]\n",
- " self.output_names = self.output_names + param_names\n",
+ " self.output_names = self.output_names + self.param_names\n",
"\n",
" # Add first output entry for the sample_mean\n",
" self.output_names.insert(0, \"\") \n",
"\n",
- " self.outputsize_multiplier = 2 * n_components\n",
+ " self.n_outputs = 2 + weighted\n",
+ " self.n_components = n_components\n",
+ " self.outputsize_multiplier = self.n_outputs * n_components\n",
" self.is_distribution_output = True\n",
+ " self.has_predicted = False\n",
"\n",
" def domain_map(self, output: torch.Tensor):\n",
- " mu, alpha = torch.tensor_split(output, 2, dim=-1)\n",
- " return (mu, alpha)\n",
+ " output = output.reshape(output.shape[0],\n",
+ " output.shape[1],\n",
+ " -1,\n",
+ " self.outputsize_multiplier)\n",
+ " \n",
+ " return torch.tensor_split(output, self.n_outputs, dim=-1)\n",
"\n",
" def scale_decouple(self, \n",
" output,\n",
@@ -3439,11 +3486,18 @@
" Also adds domain protection to the distribution parameters.\n",
" \"\"\"\n",
" # Efficient NBinomial parametrization\n",
- " mu, alpha = output\n",
+ " if self.weighted:\n",
+ " mu, alpha, weights = output\n",
+ " weights = F.softmax(weights, dim=-1)\n",
+ " else:\n",
+ " mu, alpha = output\n",
+ "\n",
" mu = F.softplus(mu) + 1e-8\n",
" alpha = F.softplus(alpha) + 1e-8 # alpha = 1/total_counts\n",
" if (loc is not None) and (scale is not None):\n",
- " loc = loc.view(mu.size(dim=0), 1, -1)\n",
+ " if loc.ndim == 3:\n",
+ " loc = loc.unsqueeze(-1)\n",
+ " scale = scale.unsqueeze(-1) \n",
" mu *= loc\n",
" alpha /= (loc + 1.)\n",
"\n",
@@ -3452,20 +3506,48 @@
" # => probs = mu / [total_count * (1 + mu * (1/total_count))]\n",
" total_count = 1.0 / alpha\n",
" probs = (mu * alpha / (1.0 + mu * alpha)) + 1e-8 \n",
- " return (total_count, probs)\n",
+ " if self.weighted:\n",
+ " return (total_count, probs, weights)\n",
+ " else:\n",
+ " return (total_count, probs)\n",
+ "\n",
+ " def get_distribution(self, distr_args) -> Distribution:\n",
+ " \"\"\"\n",
+ " Construct the associated Pytorch Distribution, given the collection of\n",
+ " constructor arguments and, optionally, location and scale tensors.\n",
+ "\n",
+ " **Parameters**
\n",
+ " `distr_args`: Constructor arguments for the underlying Distribution type.
\n",
+ "\n",
+ " **Returns**
\n",
+ " `Distribution`: AffineTransformed distribution.
\n",
+ " \"\"\"\n",
+ " if self.weighted:\n",
+ " total_count, probs, weights = distr_args\n",
+ " else:\n",
+ " total_count, probs = distr_args\n",
+ " weights = torch.full_like(total_count, fill_value=1 / self.n_components)\n",
+ "\n",
+ " mix = Categorical(weights)\n",
+ " components = NegativeBinomial(total_count, probs)\n",
+ " components.support = constraints.nonnegative\n",
+ " distr = MixtureSameFamily(mixture_distribution=mix,\n",
+ " component_distribution=components) \n",
"\n",
- " def sample(self, distr_args, num_samples=None):\n",
+ " self.distr_mean = distr.mean\n",
+ " \n",
+ " return distr\n",
+ "\n",
+ " def sample(self,\n",
+ " distr_args: torch.Tensor,\n",
+ " num_samples: Optional[int] = None):\n",
" \"\"\"\n",
" Construct the empirical quantiles from the estimated Distribution,\n",
" sampling from it `num_samples` independently.\n",
"\n",
" **Parameters**
\n",
" `distr_args`: Constructor arguments for the underlying Distribution type.
\n",
- " `loc`: Optional tensor, of the same shape as the batch_shape + event_shape\n",
- " of the resulting distribution.
\n",
- " `scale`: Optional tensor, of the same shape as the batch_shape+event_shape \n",
- " of the resulting distribution.
\n",
- " `num_samples`: int=500, number of samples for the empirical quantiles.
\n",
+ " `num_samples`: int, overwrite number of samples for the empirical quantiles.
\n",
"\n",
" **Returns**
\n",
" `samples`: tensor, shape [B,H,`num_samples`].
\n",
@@ -3473,97 +3555,59 @@
" \"\"\"\n",
" if num_samples is None:\n",
" num_samples = self.num_samples\n",
- " \n",
- " total_count, probs = distr_args\n",
- " B, H, K = total_count.size()\n",
- " Q = len(self.quantiles)\n",
- " assert total_count.shape == probs.shape\n",
- "\n",
- " # Sample K ~ Mult(weights)\n",
- " # shared across B, H\n",
- " # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2)\n",
- " \n",
- " weights = (1/K) * torch.ones_like(probs, device=probs.device)\n",
- " \n",
- " # Avoid loop, vectorize\n",
- " weights = weights.reshape(-1, K)\n",
- " total_count = total_count.flatten()\n",
- " probs = probs.flatten()\n",
- "\n",
- " # Vectorization trick to recover row_idx\n",
- " sample_idxs = torch.multinomial(input=weights, \n",
- " num_samples=num_samples,\n",
- " replacement=True)\n",
- " aux_col_idx = torch.unsqueeze(torch.arange(B * H, device=probs.device),-1) * K\n",
- "\n",
- " # To device\n",
- " sample_idxs = sample_idxs.to(probs.device)\n",
- "\n",
- " sample_idxs = sample_idxs + aux_col_idx\n",
- " sample_idxs = sample_idxs.flatten()\n",
"\n",
- " sample_total_count = total_count[sample_idxs]\n",
- " sample_probs = probs[sample_idxs]\n",
+ " # Instantiate Scaled Decoupled Distribution\n",
+ " distr = self.get_distribution(distr_args=distr_args)\n",
+ " samples = distr.sample(sample_shape=(num_samples,))\n",
+ " samples = samples.permute(1, 2, 3, 0) # [samples, B, H, N] -> [B, H, N, samples]\n",
"\n",
- " # Sample y ~ NBinomial(total_count, probs) independently\n",
- " dist = NegativeBinomial(total_count=sample_total_count, \n",
- " probs=sample_probs)\n",
- " samples = dist.sample(sample_shape=(1,)).to(probs.device)[0]\n",
- " samples = samples.view(B*H, num_samples)\n",
- " sample_mean = torch.mean(samples, dim=-1)\n",
+ " sample_mean = torch.mean(samples, dim=-1, keepdim=True) \n",
"\n",
" # Compute quantiles\n",
- " quantiles_device = self.quantiles.to(probs.device)\n",
- " quants = torch.quantile(input=samples, q=quantiles_device, dim=1)\n",
- " quants = quants.permute((1,0)) # Q, B*H\n",
- "\n",
- " # Final reshapes\n",
- " samples = samples.view(B, H, num_samples)\n",
- " sample_mean = sample_mean.view(B, H, 1)\n",
- " quants = quants.view(B, H, Q)\n",
+ " quantiles_device = self.quantiles.to(distr_args[0].device)\n",
+ " quants = torch.quantile(input=samples, \n",
+ " q=quantiles_device, \n",
+ " dim=-1)\n",
+ " quants = quants.permute(1, 2, 3, 0) # [Q, B, H, N] -> [B, H, N, Q]\n",
"\n",
" return samples, sample_mean, quants\n",
"\n",
- " def neglog_likelihood(self,\n",
- " y: torch.Tensor,\n",
- " distr_args: Tuple[torch.Tensor, torch.Tensor],\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " def update_quantile(self, q: Optional[List[float]] = None):\n",
+ " if q is not None:\n",
+ " self.quantiles = nn.Parameter(torch.tensor(q, dtype=torch.float32), requires_grad=False)\n",
+ " self.output_names = [\"\"] + [f\"_ql{q_i}\" for q_i in q] + self.return_params * self.param_names\n",
+ " self.has_predicted = True\n",
+ " elif q is None and self.has_predicted:\n",
+ " self.quantiles = nn.Parameter(torch.tensor([0.5], dtype=torch.float32), requires_grad=False)\n",
+ " self.output_names = [\"\", \"-median\"] + self.return_params * self.param_names\n",
"\n",
- " if mask is None: \n",
- " mask = torch.ones_like(y)\n",
- " \n",
- " total_count, probs = distr_args\n",
- " B, H, K = total_count.size()\n",
- " \n",
- " weights = (1/K) * torch.ones_like(probs, device=probs.device)\n",
- " \n",
- " y = y[:,:, None]\n",
- " mask = mask[:,:,None]\n",
- "\n",
- " log_unnormalized_prob = (total_count * torch.log(1.-probs) + y * torch.log(probs))\n",
- " log_normalization = (-torch.lgamma(total_count + y) + torch.lgamma(1. + y) +\n",
- " torch.lgamma(total_count))\n",
- " log_normalization[total_count + y == 0.] = 0.\n",
- " log = log_unnormalized_prob - log_normalization\n",
- "\n",
- " #log = torch.sum(log, dim=0, keepdim=True) # Joint within batch/group\n",
- " #log = torch.sum(log, dim=1, keepdim=True) # Joint within horizon\n",
- "\n",
- " # Numerical stability mixture and loglik\n",
- " log_max = torch.amax(log, dim=2, keepdim=True) # [1,1,K] (collapsed joints)\n",
- " lik = weights * torch.exp(log-log_max) # Take max\n",
- " loglik = torch.log(torch.sum(lik, dim=2, keepdim=True)) + log_max # Return max\n",
- " \n",
- " loglik = loglik * mask #replace with mask\n",
+ " def __call__(self,\n",
+ " y: torch.Tensor,\n",
+ " distr_args: torch.Tensor,\n",
+ " mask: Union[torch.Tensor, None] = None):\n",
+ " \"\"\"\n",
+ " Computes the negative log-likelihood objective function. \n",
+ " To estimate the following predictive distribution:\n",
"\n",
- " loss = -torch.mean(loglik)\n",
- " return loss\n",
- " \n",
- " def __call__(self, y: torch.Tensor,\n",
- " distr_args: Tuple[torch.Tensor, torch.Tensor],\n",
- " mask: Union[torch.Tensor, None] = None,):\n",
+ " $$\\mathrm{P}(\\mathbf{y}_{\\\\tau}\\,|\\,\\\\theta) \\\\quad \\mathrm{and} \\\\quad -\\log(\\mathrm{P}(\\mathbf{y}_{\\\\tau}\\,|\\,\\\\theta))$$\n",
+ "\n",
+ " where $\\\\theta$ represents the distributions parameters. It aditionally \n",
+ " summarizes the objective signal using a weighted average using the `mask` tensor. \n",
"\n",
- " return self.neglog_likelihood(y=y, distr_args=distr_args, mask=mask)"
+ " **Parameters**
\n",
+ " `y`: tensor, Actual values.
\n",
+ " `distr_args`: Constructor arguments for the underlying Distribution type.
\n",
+ " `mask`: tensor, Specifies date stamps per serie to consider in loss.
\n",
+ "\n",
+ " **Returns**
\n",
+ " `loss`: scalar, weighted loss function against which backpropagation will be performed.
\n",
+ " \"\"\"\n",
+ " # Instantiate Scaled Decoupled Distribution\n",
+ " distr = self.get_distribution(distr_args=distr_args)\n",
+ " loss_values = -distr.log_prob(y)\n",
+ " loss_weights = mask\n",
+ " \n",
+ " return weighted_average(loss_values, weights=loss_weights)"
]
},
{
@@ -3604,8 +3648,8 @@
"outputs": [],
"source": [
"#| hide\n",
- "# Create single mixture and broadcast to N,H,K\n",
- "counts = torch.Tensor([[10,20,30], [20,40,60]])[None, :, :]\n",
+ "# Create single mixture and broadcast to N,H,1,K\n",
+ "counts = torch.Tensor([[5,10,15], [10,20,30]])[None, :, :].unsqueeze(2)\n",
"\n",
"# # Create repetitions for the batch dimension N.\n",
"N=2\n",
@@ -3613,22 +3657,22 @@
"weights = torch.ones_like(counts)\n",
"probs = torch.ones_like(counts) * 0.5\n",
"\n",
- "print('weights.shape (N,H,K) \\t', weights.shape)\n",
- "print('counts.shape (N,H,K) \\t', counts.shape)\n",
- "print('probs.shape (N,H,K) \\t', probs.shape)\n",
+ "print('weights.shape (N,H,1,K) \\t', weights.shape)\n",
+ "print('counts.shape (N,H,1,K) \\t', counts.shape)\n",
+ "print('probs.shape (N,H,1,K) \\t', probs.shape)\n",
"\n",
- "model = NBMM(quantiles=[0.1, 0.40, 0.5, 0.60, 0.9])\n",
- "distr_args = (counts, probs)\n",
+ "model = NBMM(quantiles=[0.1, 0.40, 0.5, 0.60, 0.9], weighted=True)\n",
+ "distr_args = (counts, probs, weights)\n",
"samples, sample_mean, quants = model.sample(distr_args, num_samples=2000)\n",
"\n",
- "print('samples.shape (N,H,num_samples) ', samples.shape)\n",
- "print('sample_mean.shape (N,H) ', sample_mean.shape)\n",
- "print('quants.shape (N,H,Q) \\t\\t', quants.shape)\n",
+ "print('samples.shape (N,H,1,num_samples) ', samples.shape)\n",
+ "print('sample_mean.shape (N,H,1,1) ', sample_mean.shape)\n",
+ "print('quants.shape (N,H,1,Q) \\t\\t', quants.shape)\n",
"\n",
"# Plot synthethic data\n",
"x_plot = range(quants.shape[1]) # H length\n",
- "y_plot_hat = quants[0,:,:] # Filter N,G,T -> H,Q\n",
- "samples_hat = samples[0,:,:] # Filter N,G,T -> H,num_samples\n",
+ "y_plot_hat = quants[0,:,0,:] # Filter N,G,T -> H,Q\n",
+ "samples_hat = samples[0,:,0,:] # Filter N,G,T -> H,num_samples\n",
"\n",
"# Kernel density plot for single forecast horizon \\tau = t+1\n",
"fig, ax = plt.subplots(figsize=(3.7, 2.9))\n",
@@ -3723,7 +3767,9 @@
" def __call__(self,\n",
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " y_insample: torch.Tensor,\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " ) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:**
\n",
" `y`: tensor, Actual values.
\n",
@@ -3784,7 +3830,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class TukeyLoss(torch.nn.Module):\n",
+ "class TukeyLoss(BasePointLoss):\n",
" \"\"\" Tukey Loss\n",
"\n",
" The Tukey loss function, also known as Tukey's biweight function, is a \n",
@@ -3823,10 +3869,14 @@
"\n",
" def domain_map(self, y_hat: torch.Tensor):\n",
" \"\"\"\n",
- " Univariate loss operates in dimension [B,T,H]/[B,H]\n",
- " This changes the network's output from [B,H,1]->[B,H]\n",
+ " Input:\n",
+ " Univariate: [B, H, 1]\n",
+ " Multivariate: [B, H, N]\n",
+ "\n",
+ " Output: [B, H, N]\n",
" \"\"\"\n",
- " return y_hat.squeeze(-1)\n",
+ "\n",
+ " return y_hat\n",
"\n",
" def masked_mean(self, x, mask, dim):\n",
" x_nan = x.masked_fill(mask < 1, float(\"nan\"))\n",
@@ -3834,8 +3884,12 @@
" x_mean = torch.nan_to_num(x_mean, nan=0.0)\n",
" return x_mean\n",
"\n",
- " def __call__(self, y: torch.Tensor, y_hat: torch.Tensor, \n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " def __call__(self,\n",
+ " y: torch.Tensor,\n",
+ " y_hat: torch.Tensor,\n",
+ " y_insample: torch.Tensor,\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " ) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:**
\n",
" `y`: tensor, Actual values.
\n",
@@ -3942,7 +3996,9 @@
" def __call__(self,\n",
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " y_insample: torch.Tensor,\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " ) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:**
\n",
" `y`: tensor, Actual values.
\n",
@@ -3952,6 +4008,7 @@
" **Returns:**
\n",
" `huber_qloss`: tensor (single value).\n",
" \"\"\"\n",
+ " \n",
" error = y_hat - y\n",
" zero_error = torch.zeros_like(error)\n",
" sq = torch.maximum(-error, zero_error)\n",
@@ -4051,9 +4108,18 @@
"\n",
" def domain_map(self, y_hat: torch.Tensor):\n",
" \"\"\"\n",
- " Identity domain map [B,T,H,Q]/[B,H,Q]\n",
+ " Input:\n",
+ " Univariate: [B, H, 1 * Q]\n",
+ " Multivariate: [B, H, N * Q]\n",
+ "\n",
+ " Output: [B, H, N, Q]\n",
" \"\"\"\n",
- " return y_hat\n",
+ " output = y_hat.reshape(y_hat.shape[0],\n",
+ " y_hat.shape[1],\n",
+ " -1,\n",
+ " self.outputsize_multiplier)\n",
+ "\n",
+ " return output\n",
" \n",
" def _compute_weights(self, y, mask):\n",
" \"\"\"\n",
@@ -4061,25 +4127,24 @@
" Set horizon_weight to a ones[H] tensor if not set.\n",
" If set, check that it has the same length as the horizon in x.\n",
" \"\"\"\n",
- " if mask is None:\n",
- " mask = torch.ones_like(y, device=y.device)\n",
- " else:\n",
- " mask = mask.unsqueeze(1) # Add Q dimension.\n",
"\n",
" if self.horizon_weight is None:\n",
- " self.horizon_weight = torch.ones(mask.shape[-1])\n",
+ " weights = torch.ones_like(mask)\n",
" else:\n",
- " assert mask.shape[-1] == len(self.horizon_weight), \\\n",
- " 'horizon_weight must have same length as Y'\n",
- " \n",
- " weights = self.horizon_weight.clone()\n",
- " weights = torch.ones_like(mask, device=mask.device) * weights.to(mask.device)\n",
+ " assert mask.shape[1] == len(self.horizon_weight), \\\n",
+ " 'horizon_weight must have same length as Y' \n",
+ " weights = self.horizon_weight.clone()\n",
+ " weights = weights[None, :, None, None].to(mask.device)\n",
+ " weights = torch.ones_like(mask, device=mask.device) * weights\n",
+ " \n",
" return weights * mask\n",
"\n",
" def __call__(self,\n",
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " y_insample: torch.Tensor,\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " ) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:**
\n",
" `y`: tensor, Actual values.
\n",
@@ -4089,25 +4154,27 @@
" **Returns:**
\n",
" `hmqloss`: tensor (single value).\n",
" \"\"\"\n",
- "\n",
- " error = y_hat - y.unsqueeze(-1)\n",
+ " y = y.unsqueeze(-1)\n",
+ " \n",
+ " if mask is not None:\n",
+ " mask = mask.unsqueeze(-1)\n",
+ " else:\n",
+ " mask = torch.ones_like(y, device=y.device)\n",
+ " \n",
+ " error = y_hat - y\n",
+ " \n",
" zero_error = torch.zeros_like(error) \n",
" sq = torch.maximum(-error, torch.zeros_like(error))\n",
" s1_q = torch.maximum(error, torch.zeros_like(error))\n",
- " losses = F.huber_loss(self.quantiles * sq, zero_error, \n",
+ " \n",
+ " quantiles = self.quantiles[None, None, None, :]\n",
+ " losses = F.huber_loss(quantiles * sq, zero_error, \n",
" reduction='none', delta=self.delta) + \\\n",
- " F.huber_loss((1 - self.quantiles) * s1_q, zero_error, \n",
+ " F.huber_loss((1 - quantiles) * s1_q, zero_error, \n",
" reduction='none', delta=self.delta)\n",
- " losses = (1/len(self.quantiles)) * losses\n",
+ " losses = (1 / len(quantiles)) * losses\n",
"\n",
- " if y_hat.ndim == 3: # BaseWindows\n",
- " losses = losses.swapaxes(-2,-1) # [B,H,Q] -> [B,Q,H] (needed for horizon weighting, H at the end)\n",
- " elif y_hat.ndim == 4: # BaseRecurrent\n",
- " losses = losses.swapaxes(-2,-1)\n",
- " losses = losses.swapaxes(-2,-3) # [B,seq_len,H,Q] -> [B,Q,seq_len,H] (needed for horizon weighting, H at the end)\n",
- "\n",
- " weights = self._compute_weights(y=losses, mask=mask) # Use losses for extra dim\n",
- " # NOTE: Weights do not have Q dimension.\n",
+ " weights = self._compute_weights(y=losses, mask=mask) \n",
"\n",
" return _weighted_mean(losses=losses, weights=weights)"
]
@@ -4167,7 +4234,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class Accuracy(torch.nn.Module):\n",
+ "class Accuracy(BasePointLoss):\n",
" \"\"\" Accuracy\n",
"\n",
" Computes the accuracy between categorical `y` and `y_hat`.\n",
@@ -4180,16 +4247,25 @@
" def __init__(self,):\n",
" super(Accuracy, self).__init__()\n",
" self.is_distribution_output = False\n",
+ " self.outputsize_multiplier = 1\n",
"\n",
" def domain_map(self, y_hat: torch.Tensor):\n",
" \"\"\"\n",
- " Univariate loss operates in dimension [B,T,H]/[B,H]\n",
- " This changes the network's output from [B,H,1]->[B,H]\n",
+ " Input:\n",
+ " Univariate: [B, H, 1]\n",
+ " Multivariate: [B, H, N]\n",
+ "\n",
+ " Output: [B, H, N]\n",
" \"\"\"\n",
- " return y_hat.squeeze(-1)\n",
"\n",
- " def __call__(self, y: torch.Tensor, y_hat: torch.Tensor, \n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " return y_hat\n",
+ " \n",
+ " def __call__(self,\n",
+ " y: torch.Tensor,\n",
+ " y_hat: torch.Tensor,\n",
+ " y_insample: torch.Tensor,\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " ) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:**
\n",
" `y`: tensor, Actual values.
\n",
@@ -4199,10 +4275,11 @@
" **Returns:**
\n",
" `accuracy`: tensor (single value).\n",
" \"\"\"\n",
+ "\n",
" if mask is None:\n",
" mask = torch.ones_like(y_hat)\n",
"\n",
- " measure = (y.unsqueeze(-1) == y_hat) * mask.unsqueeze(-1)\n",
+ " measure = (y == y_hat) * mask\n",
" accuracy = torch.mean(measure)\n",
" return accuracy"
]
@@ -4244,7 +4321,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class sCRPS(torch.nn.Module):\n",
+ "class sCRPS(BasePointLoss):\n",
" \"\"\"Scaled Continues Ranked Probability Score\n",
"\n",
" Calculates a scaled variation of the CRPS, as proposed by Rangapuram (2021),\n",
@@ -4279,8 +4356,12 @@
" self.mql = MQLoss(level=level, quantiles=quantiles)\n",
" self.is_distribution_output = False\n",
" \n",
- " def __call__(self, y: torch.Tensor, y_hat: torch.Tensor, \n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " def __call__(self,\n",
+ " y: torch.Tensor,\n",
+ " y_hat: torch.Tensor,\n",
+ " y_insample: torch.Tensor,\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " ) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:**
\n",
" `y`: tensor, Actual values.
\n",
@@ -4290,7 +4371,7 @@
" **Returns:**
\n",
" `scrps`: tensor (single value).\n",
" \"\"\"\n",
- " mql = self.mql(y=y, y_hat=y_hat, mask=mask)\n",
+ " mql = self.mql(y=y, y_hat=y_hat, mask=mask, y_insample=y_insample)\n",
" norm = torch.sum(torch.abs(y))\n",
" unmean = torch.sum(mask)\n",
" scrps = 2 * mql * unmean / (norm + 1e-5)\n",
@@ -4326,11 +4407,11 @@
"source": [
"#| hide\n",
"# Each 1 is an error, there are 6 datapoints.\n",
- "y = torch.Tensor([[0,0,0],[0,0,0]])\n",
- "y_hat = torch.Tensor([[0,0,1],[1,0,1]])\n",
+ "y = torch.Tensor([[0,0,0],[0,0,0]]).unsqueeze(-1)\n",
+ "y_hat = torch.Tensor([[0,0,1],[1,0,1]]).unsqueeze(-1)\n",
"\n",
"# Complete mask and horizon_weight\n",
- "mask = torch.Tensor([[1,1,1],[1,1,1]])\n",
+ "mask = torch.Tensor([[1,1,1],[1,1,1]]).unsqueeze(-1)\n",
"horizon_weight = torch.Tensor([1,1,1])\n",
"\n",
"mae = MAE(horizon_weight=horizon_weight)\n",
@@ -4338,21 +4419,21 @@
"assert loss==(3/6), 'Should be 3/6'\n",
"\n",
"# Incomplete mask and complete horizon_weight\n",
- "mask = torch.Tensor([[1,1,1],[0,1,1]]) # Only 1 error and points is masked.\n",
+ "mask = torch.Tensor([[1,1,1],[0,1,1]]).unsqueeze(-1) # Only 1 error and points is masked.\n",
"horizon_weight = torch.Tensor([1,1,1])\n",
"mae = MAE(horizon_weight=horizon_weight)\n",
"loss = mae(y=y, y_hat=y_hat, mask=mask)\n",
"assert loss==(2/5), 'Should be 2/5'\n",
"\n",
"# Complete mask and incomplete horizon_weight\n",
- "mask = torch.Tensor([[1,1,1],[1,1,1]])\n",
+ "mask = torch.Tensor([[1,1,1],[1,1,1]]).unsqueeze(-1)\n",
"horizon_weight = torch.Tensor([1,1,0]) # 2 errors and points are masked.\n",
"mae = MAE(horizon_weight=horizon_weight)\n",
"loss = mae(y=y, y_hat=y_hat, mask=mask)\n",
"assert loss==(1/4), 'Should be 1/4'\n",
"\n",
"# Incomplete mask and incomplete horizon_weight\n",
- "mask = torch.Tensor([[0,1,1],[1,1,1]])\n",
+ "mask = torch.Tensor([[0,1,1],[1,1,1]]).unsqueeze(-1)\n",
"horizon_weight = torch.Tensor([1,1,0]) # 2 errors are masked, and 3 points.\n",
"mae = MAE(horizon_weight=horizon_weight)\n",
"loss = mae(y=y, y_hat=y_hat, mask=mask)\n",
diff --git a/nbs/models.autoformer.ipynb b/nbs/models.autoformer.ipynb
index 9c6567f2e..999c4ca62 100644
--- a/nbs/models.autoformer.ipynb
+++ b/nbs/models.autoformer.ipynb
@@ -68,7 +68,7 @@
"import torch.nn.functional as F\n",
"\n",
"from neuralforecast.common._modules import DataEmbedding, SeriesDecomp\n",
- "from neuralforecast.common._base_windows import BaseWindows\n",
+ "from neuralforecast.common._base_model import BaseModel\n",
"\n",
"from neuralforecast.losses.pytorch import MAE"
]
@@ -80,8 +80,12 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
+ "\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -410,7 +414,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class Autoformer(BaseWindows):\n",
+ "class Autoformer(BaseModel):\n",
" \"\"\" Autoformer\n",
"\n",
" The Autoformer model tackles the challenge of finding reliable dependencies on intricate temporal patterns of long-horizon forecasting.\n",
@@ -469,10 +473,11 @@
"\t- [Wu, Haixu, Jiehui Xu, Jianmin Wang, and Mingsheng Long. \"Autoformer: Decomposition transformers with auto-correlation for long-term series forecasting\"](https://proceedings.neurips.cc/paper/2021/hash/bcc0d400288793e8bdcd7c19a8ac0c2b-Abstract.html)
\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'windows'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = False\n",
" EXOGENOUS_STAT = False\n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h: int, \n",
@@ -616,13 +621,9 @@
" def forward(self, windows_batch):\n",
" # Parse windows_batch\n",
" insample_y = windows_batch['insample_y']\n",
- " #insample_mask = windows_batch['insample_mask']\n",
- " #hist_exog = windows_batch['hist_exog']\n",
- " #stat_exog = windows_batch['stat_exog']\n",
" futr_exog = windows_batch['futr_exog']\n",
"\n",
" # Parse inputs\n",
- " insample_y = insample_y.unsqueeze(-1) # [Ws,L,1]\n",
" if self.futr_exog_size > 0:\n",
" x_mark_enc = futr_exog[:,:self.input_size,:]\n",
" x_mark_dec = futr_exog[:,-(self.label_len+self.h):,:]\n",
@@ -650,7 +651,8 @@
" # final\n",
" dec_out = trend_part + seasonal_part\n",
"\n",
- " forecast = self.loss.domain_map(dec_out[:, -self.h:])\n",
+ " forecast = dec_out[:, -self.h:]\n",
+ " \n",
" return forecast"
]
},
@@ -681,6 +683,21 @@
"show_doc(Autoformer.predict, name='Autoformer.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(Autoformer, [\"airpassengers\"])"
+ ]
+ },
{
"attachments": {},
"cell_type": "markdown",
diff --git a/nbs/models.bitcn.ipynb b/nbs/models.bitcn.ipynb
index cd78bb194..b7363dba4 100644
--- a/nbs/models.bitcn.ipynb
+++ b/nbs/models.bitcn.ipynb
@@ -55,8 +55,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -74,7 +77,7 @@
"import numpy as np\n",
"\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_windows import BaseWindows"
+ "from neuralforecast.common._base_model import BaseModel"
]
},
{
@@ -146,7 +149,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class BiTCN(BaseWindows):\n",
+ "class BiTCN(BaseModel):\n",
" \"\"\" BiTCN\n",
"\n",
" Bidirectional Temporal Convolutional Network (BiTCN) is a forecasting architecture based on two temporal convolutional networks (TCNs). The first network ('forward') encodes future covariates of the time series, whereas the second network ('backward') encodes past observations and covariates. This is a univariate model.\n",
@@ -170,7 +173,7 @@
" `batch_size`: int=32, number of different series in each batch.
\n",
" `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n",
" `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n",
- " `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
\n",
+ " `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch, -1 uses all.
\n",
" `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
" `step_size`: int=1, step size between each window of temporal data.
\n",
" `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
@@ -190,10 +193,11 @@
"\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'windows'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = True\n",
" EXOGENOUS_STAT = True\n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h: int,\n",
@@ -315,7 +319,7 @@
"\n",
" def forward(self, windows_batch):\n",
" # Parse windows_batch\n",
- " x = windows_batch['insample_y'].unsqueeze(-1) # [B, L, 1]\n",
+ " x = windows_batch['insample_y'].contiguous() # [B, L, 1]\n",
" hist_exog = windows_batch['hist_exog'] # [B, L, X]\n",
" futr_exog = windows_batch['futr_exog'] # [B, L + h, F]\n",
" stat_exog = windows_batch['stat_exog'] # [B, S]\n",
@@ -358,11 +362,8 @@
"\n",
" # Output layer to create forecasts\n",
" x = x.permute(0, 2, 1) # [B, 3 * hidden_size, h] -> [B, h, 3 * hidden_size]\n",
- " x = self.output_lin(x) # [B, h, 3 * hidden_size] -> [B, h, n_outputs] \n",
+ " forecast = self.output_lin(x) # [B, h, 3 * hidden_size] -> [B, h, n_outputs] \n",
"\n",
- " # Map to output domain\n",
- " forecast = self.loss.domain_map(x)\n",
- " \n",
" return forecast"
]
},
@@ -393,6 +394,21 @@
"show_doc(BiTCN.predict, name='BiTCN.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(BiTCN, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -411,8 +427,8 @@
"import matplotlib.pyplot as plt\n",
"\n",
"from neuralforecast import NeuralForecast\n",
- "from neuralforecast.models import BiTCN\n",
"from neuralforecast.losses.pytorch import GMM\n",
+ "from neuralforecast.models import BiTCN\n",
"from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
"\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds\n",
@@ -196,10 +195,11 @@
"\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'windows'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = False\n",
" EXOGENOUS_STAT = True\n",
+ " MULTIVARIATE = False\n",
+ " RECURRENT = True\n",
"\n",
" def __init__(self,\n",
" h,\n",
@@ -215,7 +215,7 @@
" stat_exog_list = None,\n",
" exclude_insample_y = False,\n",
" loss = DistributionLoss(distribution='StudentT', level=[80, 90], return_params=False),\n",
- " valid_loss = MQLoss(level=[80, 90]),\n",
+ " valid_loss = MAE(),\n",
" max_steps: int = 1000,\n",
" learning_rate: float = 1e-3,\n",
" num_lr_decays: int = 3,\n",
@@ -241,15 +241,6 @@
" if exclude_insample_y:\n",
" raise Exception('DeepAR has no possibility for excluding y.')\n",
" \n",
- " if not loss.is_distribution_output:\n",
- " raise Exception('DeepAR only supports distributional outputs.')\n",
- " \n",
- " if str(type(valid_loss)) not in [\"\"]:\n",
- " raise Exception('DeepAR only supports MQLoss as validation loss.')\n",
- "\n",
- " if loss.return_params:\n",
- " raise Exception('DeepAR does not return distribution parameters due to Monte Carlo sampling.')\n",
- " \n",
" # Inherit BaseWindows class\n",
" super(DeepAR, self).__init__(h=h,\n",
" input_size=input_size,\n",
@@ -281,8 +272,7 @@
" dataloader_kwargs=dataloader_kwargs,\n",
" **trainer_kwargs)\n",
"\n",
- " self.horizon_backup = self.h # Used because h=0 during training\n",
- " self.trajectory_samples = trajectory_samples\n",
+ " self.n_samples = trajectory_samples\n",
"\n",
" # LSTM\n",
" self.encoder_n_layers = lstm_n_layers\n",
@@ -293,6 +283,8 @@
" input_encoder = 1 + self.futr_exog_size + self.stat_exog_size\n",
"\n",
" # Instantiate model\n",
+ " self.rnn_state = None\n",
+ " self.maintain_state = False\n",
" self.hist_encoder = nn.LSTM(input_size=input_encoder,\n",
" hidden_size=self.encoder_hidden_size,\n",
" num_layers=self.encoder_n_layers,\n",
@@ -305,268 +297,38 @@
" hidden_size=decoder_hidden_size,\n",
" hidden_layers=decoder_hidden_layers)\n",
"\n",
- " # Override BaseWindows method\n",
- " def training_step(self, batch, batch_idx):\n",
- "\n",
- " # During training h=0 \n",
- " self.h = 0\n",
- " y_idx = batch['y_idx']\n",
- "\n",
- " # Create and normalize windows [Ws, L, C]\n",
- " windows = self._create_windows(batch, step='train')\n",
- " original_insample_y = windows['temporal'][:, :, y_idx].clone() # windows: [B, L, Feature] -> [B, L]\n",
- " original_insample_y = original_insample_y[:,1:] # Remove first (shift in DeepAr, cell at t outputs t+1)\n",
- " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, _, _, _, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- "\n",
- " windows_batch = dict(insample_y=insample_y, # [Ws, L]\n",
- " insample_mask=insample_mask, # [Ws, L]\n",
- " futr_exog=futr_exog, # [Ws, L+H]\n",
- " hist_exog=None, # None\n",
- " stat_exog=stat_exog,\n",
- " y_idx=y_idx) # [Ws, 1]\n",
- "\n",
- " # Model Predictions\n",
- " output = self.train_forward(windows_batch)\n",
- "\n",
- " if self.loss.is_distribution_output:\n",
- " _, y_loc, y_scale = self._inv_normalization(y_hat=original_insample_y,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " outsample_y = original_insample_y\n",
- " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
- " mask = insample_mask[:,1:].clone() # Remove first (shift in DeepAr, cell at t outputs t+1)\n",
- " loss = self.loss(y=outsample_y, distr_args=distr_args, mask=mask)\n",
- " else:\n",
- " raise Exception('DeepAR only supports distributional outputs.')\n",
- "\n",
- " if torch.isnan(loss):\n",
- " print('Model Parameters', self.hparams)\n",
- " print('insample_y', torch.isnan(insample_y).sum())\n",
- " print('outsample_y', torch.isnan(outsample_y).sum())\n",
- " print('output', torch.isnan(output).sum())\n",
- " raise Exception('Loss is NaN, training stopped.')\n",
- "\n",
- " self.log(\n",
- " 'train_loss',\n",
- " loss.item(),\n",
- " batch_size=outsample_y.size(0),\n",
- " prog_bar=True,\n",
- " on_epoch=True,\n",
- " )\n",
- " self.train_trajectories.append((self.global_step, loss.item()))\n",
- "\n",
- " self.h = self.horizon_backup # Restore horizon\n",
- " return loss\n",
- "\n",
- " def validation_step(self, batch, batch_idx):\n",
- "\n",
- " self.h == self.horizon_backup\n",
- "\n",
- " if self.val_size == 0:\n",
- " return np.nan\n",
- "\n",
- " # TODO: Hack to compute number of windows\n",
- " windows = self._create_windows(batch, step='val')\n",
- " n_windows = len(windows['temporal'])\n",
- " y_idx = batch['y_idx']\n",
- "\n",
- " # Number of windows in batch\n",
- " windows_batch_size = self.inference_windows_batch_size\n",
- " if windows_batch_size < 0:\n",
- " windows_batch_size = n_windows\n",
- " n_batches = int(np.ceil(n_windows/windows_batch_size))\n",
- "\n",
- " valid_losses = []\n",
- " batch_sizes = []\n",
- " for i in range(n_batches):\n",
- " # Create and normalize windows [Ws, L+H, C]\n",
- " w_idxs = np.arange(i*windows_batch_size, \n",
- " min((i+1)*windows_batch_size, n_windows))\n",
- " windows = self._create_windows(batch, step='val', w_idxs=w_idxs)\n",
- " original_outsample_y = torch.clone(windows['temporal'][:,-self.h:,0])\n",
- " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, _, outsample_mask, \\\n",
- " _, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- " windows_batch = dict(insample_y=insample_y,\n",
- " insample_mask=insample_mask,\n",
- " futr_exog=futr_exog,\n",
- " hist_exog=None,\n",
- " stat_exog=stat_exog,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx) \n",
- " \n",
- " # Model Predictions\n",
- " output_batch = self(windows_batch)\n",
- " # Monte Carlo already returns y_hat with mean and quantiles\n",
- " output_batch = output_batch[:,:, 1:] # Remove mean\n",
- " valid_loss_batch = self.valid_loss(y=original_outsample_y, y_hat=output_batch, mask=outsample_mask)\n",
- " valid_losses.append(valid_loss_batch)\n",
- " batch_sizes.append(len(output_batch))\n",
- "\n",
- " valid_loss = torch.stack(valid_losses)\n",
- " batch_sizes = torch.tensor(batch_sizes, device=valid_loss.device)\n",
- " batch_size = torch.sum(batch_sizes)\n",
- " valid_loss = torch.sum(valid_loss * batch_sizes) / batch_size\n",
- "\n",
- " if torch.isnan(valid_loss):\n",
- " raise Exception('Loss is NaN, training stopped.')\n",
- "\n",
- " self.log(\n",
- " 'valid_loss',\n",
- " valid_loss.item(),\n",
- " batch_size=batch_size,\n",
- " prog_bar=True,\n",
- " on_epoch=True,\n",
- " )\n",
- " self.validation_step_outputs.append(valid_loss)\n",
- " return valid_loss\n",
- "\n",
- " def predict_step(self, batch, batch_idx):\n",
- "\n",
- " self.h == self.horizon_backup\n",
- "\n",
- " # TODO: Hack to compute number of windows\n",
- " windows = self._create_windows(batch, step='predict')\n",
- " n_windows = len(windows['temporal'])\n",
- " y_idx = batch['y_idx']\n",
- "\n",
- " # Number of windows in batch\n",
- " windows_batch_size = self.inference_windows_batch_size\n",
- " if windows_batch_size < 0:\n",
- " windows_batch_size = n_windows\n",
- " n_batches = int(np.ceil(n_windows/windows_batch_size))\n",
- "\n",
- " y_hats = []\n",
- " for i in range(n_batches):\n",
- " # Create and normalize windows [Ws, L+H, C]\n",
- " w_idxs = np.arange(i*windows_batch_size, \n",
- " min((i+1)*windows_batch_size, n_windows))\n",
- " windows = self._create_windows(batch, step='predict', w_idxs=w_idxs)\n",
- " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, _, _, _, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- " windows_batch = dict(insample_y=insample_y, # [Ws, L]\n",
- " insample_mask=insample_mask, # [Ws, L]\n",
- " futr_exog=futr_exog, # [Ws, L+H]\n",
- " stat_exog=stat_exog,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " \n",
- " # Model Predictions\n",
- " y_hat = self(windows_batch)\n",
- " # Monte Carlo already returns y_hat with mean and quantiles\n",
- " y_hats.append(y_hat)\n",
- " y_hat = torch.cat(y_hats, dim=0)\n",
- " return y_hat\n",
- "\n",
- " def train_forward(self, windows_batch):\n",
+ " def forward(self, windows_batch):\n",
"\n",
" # Parse windows_batch\n",
- " encoder_input = windows_batch['insample_y'][:,:, None] # <- [B,T,1]\n",
+ " encoder_input = windows_batch['insample_y'] # <- [B, T, 1]\n",
" futr_exog = windows_batch['futr_exog']\n",
" stat_exog = windows_batch['stat_exog']\n",
"\n",
- " #[B, input_size-1, X]\n",
- " encoder_input = encoder_input[:,:-1,:] # Remove last (shift in DeepAr, cell at t outputs t+1)\n",
" _, input_size = encoder_input.shape[:2]\n",
" if self.futr_exog_size > 0:\n",
- " # Shift futr_exog (t predicts t+1, last output is outside insample_y)\n",
- " encoder_input = torch.cat((encoder_input, futr_exog[:,1:,:]), dim=2)\n",
+ " encoder_input = torch.cat((encoder_input, futr_exog), dim=2)\n",
+ "\n",
" if self.stat_exog_size > 0:\n",
- " stat_exog = stat_exog.unsqueeze(1).repeat(1, input_size, 1) # [B, S] -> [B, input_size-1, S]\n",
+ " stat_exog = stat_exog.unsqueeze(1).repeat(1, input_size, 1) # [B, S] -> [B, input_size-1, S]\n",
" encoder_input = torch.cat((encoder_input, stat_exog), dim=2)\n",
"\n",
" # RNN forward\n",
- " hidden_state, _ = self.hist_encoder(encoder_input) # [B, input_size-1, rnn_hidden_state]\n",
+ " if self.maintain_state:\n",
+ " rnn_state = self.rnn_state\n",
+ " else:\n",
+ " rnn_state = None\n",
"\n",
- " # Decoder forward\n",
- " output = self.decoder(hidden_state) # [B, input_size-1, output_size]\n",
- " output = self.loss.domain_map(output)\n",
- " return output\n",
- " \n",
- " def forward(self, windows_batch):\n",
+ " hidden_state, rnn_state = self.hist_encoder(encoder_input, \n",
+ " rnn_state) # [B, input_size-1, rnn_hidden_state]\n",
"\n",
- " # Parse windows_batch\n",
- " encoder_input = windows_batch['insample_y'][:,:, None] # <- [B,L,1]\n",
- " futr_exog = windows_batch['futr_exog'] # <- [B,L+H, n_f]\n",
- " stat_exog = windows_batch['stat_exog']\n",
- " y_idx = windows_batch['y_idx']\n",
+ " if self.maintain_state:\n",
+ " self.rnn_state = rnn_state\n",
"\n",
- " #[B, seq_len, X]\n",
- " batch_size, input_size = encoder_input.shape[:2]\n",
- " if self.futr_exog_size > 0:\n",
- " futr_exog_input_window = futr_exog[:,1:input_size+1,:] # Align y_t with futr_exog_t+1\n",
- " encoder_input = torch.cat((encoder_input, futr_exog_input_window), dim=2)\n",
- " if self.stat_exog_size > 0:\n",
- " stat_exog_input_window = stat_exog.unsqueeze(1).repeat(1, input_size, 1) # [B, S] -> [B, input_size, S]\n",
- " encoder_input = torch.cat((encoder_input, stat_exog_input_window), dim=2)\n",
- "\n",
- " # Use input_size history to predict first h of the forecasting window\n",
- " _, h_c_tuple = self.hist_encoder(encoder_input)\n",
- " h_n = h_c_tuple[0] # [n_layers, B, lstm_hidden_state]\n",
- " c_n = h_c_tuple[1] # [n_layers, B, lstm_hidden_state]\n",
- "\n",
- " # Vectorizes trajectory samples in batch dimension [1]\n",
- " h_n = torch.repeat_interleave(h_n, self.trajectory_samples, 1) # [n_layers, B*trajectory_samples, rnn_hidden_state]\n",
- " c_n = torch.repeat_interleave(c_n, self.trajectory_samples, 1) # [n_layers, B*trajectory_samples, rnn_hidden_state]\n",
- "\n",
- " # Scales for inverse normalization\n",
- " y_scale = self.scaler.x_scale[:, 0, [y_idx]].squeeze(-1).to(encoder_input.device)\n",
- " y_loc = self.scaler.x_shift[:, 0, [y_idx]].squeeze(-1).to(encoder_input.device)\n",
- " y_scale = torch.repeat_interleave(y_scale, self.trajectory_samples, 0)\n",
- " y_loc = torch.repeat_interleave(y_loc, self.trajectory_samples, 0)\n",
- "\n",
- " # Recursive strategy prediction\n",
- " quantiles = self.loss.quantiles.to(encoder_input.device)\n",
- " y_hat = torch.zeros(batch_size, self.h, len(quantiles)+1, device=encoder_input.device)\n",
- " for tau in range(self.h):\n",
- " # Decoder forward\n",
- " last_layer_h = h_n[-1] # [B*trajectory_samples, lstm_hidden_state]\n",
- " output = self.decoder(last_layer_h) \n",
- " output = self.loss.domain_map(output)\n",
- "\n",
- " # Inverse normalization\n",
- " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
- " # Add horizon (1) dimension\n",
- " distr_args = list(distr_args)\n",
- " for i in range(len(distr_args)):\n",
- " distr_args[i] = distr_args[i].unsqueeze(-1)\n",
- " distr_args = tuple(distr_args)\n",
- " samples_tau, _, _ = self.loss.sample(distr_args=distr_args, num_samples=1)\n",
- " samples_tau = samples_tau.reshape(batch_size, self.trajectory_samples)\n",
- " sample_mean = torch.mean(samples_tau, dim=-1).to(encoder_input.device)\n",
- " quants = torch.quantile(input=samples_tau, \n",
- " q=quantiles, dim=-1).to(encoder_input.device)\n",
- " y_hat[:,tau,0] = sample_mean\n",
- " y_hat[:,tau,1:] = quants.permute((1,0)) # [Q, B] -> [B, Q]\n",
- " \n",
- " # Stop if already in the last step (no need to predict next step)\n",
- " if tau+1 == self.h:\n",
- " continue\n",
- " # Normalize to use as input\n",
- " encoder_input = self.scaler.scaler(samples_tau.flatten(), y_loc, y_scale) # [B*n_samples]\n",
- " encoder_input = encoder_input[:, None, None] # [B*n_samples, 1, 1]\n",
- "\n",
- " # Update input\n",
- " if self.futr_exog_size > 0:\n",
- " futr_exog_tau = futr_exog[:,[input_size+tau+1],:] # [B, 1, n_f]\n",
- " futr_exog_tau = torch.repeat_interleave(futr_exog_tau, self.trajectory_samples, 0) # [B*n_samples, 1, n_f]\n",
- " encoder_input = torch.cat((encoder_input, futr_exog_tau), dim=2) # [B*n_samples, 1, 1+n_f]\n",
- " if self.stat_exog_size > 0:\n",
- " stat_exog_tau = torch.repeat_interleave(stat_exog, self.trajectory_samples, 0) # [B*n_samples, n_s]\n",
- " encoder_input = torch.cat((encoder_input, stat_exog_tau[:,None,:]), dim=2) # [B*n_samples, 1, 1+n_f+n_s]\n",
- " \n",
- " _, h_c_tuple = self.hist_encoder(encoder_input, (h_n, c_n))\n",
- " h_n = h_c_tuple[0] # [n_layers, B, rnn_hidden_state]\n",
- " c_n = h_c_tuple[1] # [n_layers, B, rnn_hidden_state]\n",
- "\n",
- " return y_hat"
+ " # Decoder forward\n",
+ " output = self.decoder(hidden_state) # [B, input_size-1, output_size]\n",
+ "\n",
+ " # Return only horizon part\n",
+ " return output[:, -self.h:]"
]
},
{
@@ -596,6 +358,21 @@
"show_doc(DeepAR.predict, name='DeepAR.predict', title_level=3)"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(DeepAR, [\"airpassengers\"])"
+ ]
+ },
{
"attachments": {},
"cell_type": "markdown",
@@ -616,18 +393,18 @@
"\n",
"from neuralforecast import NeuralForecast\n",
"from neuralforecast.models import DeepAR\n",
- "from neuralforecast.losses.pytorch import DistributionLoss\n",
+ "from neuralforecast.losses.pytorch import DistributionLoss, MQLoss\n",
"from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
- "\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
"\n",
"nf = NeuralForecast(\n",
" models=[DeepAR(h=12,\n",
- " input_size=48,\n",
- " lstm_n_layers=3,\n",
+ " input_size=24,\n",
+ " lstm_n_layers=1,\n",
" trajectory_samples=100,\n",
- " loss=DistributionLoss(distribution='Normal', level=[80, 90], return_params=False),\n",
+ " loss=DistributionLoss(distribution='StudentT', level=[80, 90], return_params=True),\n",
+ " valid_loss=MQLoss(level=[80, 90]),\n",
" learning_rate=0.005,\n",
" stat_exog_list=['airline1'],\n",
" futr_exog_list=['trend'],\n",
@@ -635,7 +412,8 @@
" val_check_steps=10,\n",
" early_stop_patience_steps=-1,\n",
" scaler_type='standard',\n",
- " enable_progress_bar=True),\n",
+ " enable_progress_bar=True,\n",
+ " ),\n",
" ],\n",
" freq='M'\n",
")\n",
diff --git a/nbs/models.deepnpts.ipynb b/nbs/models.deepnpts.ipynb
index 4f5e7ee9f..465dde397 100644
--- a/nbs/models.deepnpts.ipynb
+++ b/nbs/models.deepnpts.ipynb
@@ -51,7 +51,7 @@
"from typing import Optional\n",
"\n",
"\n",
- "from neuralforecast.common._base_windows import BaseWindows\n",
+ "from neuralforecast.common._base_model import BaseModel\n",
"from neuralforecast.losses.pytorch import MAE\n"
]
},
@@ -66,7 +66,8 @@
"import warnings\n",
"\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -77,6 +78,7 @@
"source": [
"#| hide\n",
"logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
"warnings.filterwarnings(\"ignore\")"
]
},
@@ -87,7 +89,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class DeepNPTS(BaseWindows):\n",
+ "class DeepNPTS(BaseModel):\n",
" \"\"\" DeepNPTS\n",
"\n",
" Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by (weighted) sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series.\n",
@@ -133,10 +135,11 @@
"\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'windows'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = True\n",
" EXOGENOUS_STAT = True\n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
" \n",
" def __init__(self,\n",
" h,\n",
@@ -176,10 +179,10 @@
" if exclude_insample_y:\n",
" raise Exception('DeepNPTS has no possibility for excluding y.')\n",
"\n",
- " if not isinstance(loss, losses.BasePointLoss):\n",
+ " if loss.outputsize_multiplier > 1:\n",
" raise Exception('DeepNPTS only supports point loss functions (MAE, MSE, etc) as loss function.') \n",
" \n",
- " if not isinstance(valid_loss, losses.BasePointLoss):\n",
+ " if valid_loss is not None and not isinstance(valid_loss, losses.BasePointLoss):\n",
" raise Exception('DeepNPTS only supports point loss functions (MAE, MSE, etc) as valid loss function.') \n",
" \n",
" # Inherit BaseWindows class\n",
@@ -234,13 +237,13 @@
"\n",
" def forward(self, windows_batch):\n",
" # Parse windows_batch\n",
- " x = windows_batch['insample_y'].unsqueeze(-1) # [B, L, 1]\n",
+ " x = windows_batch['insample_y'] # [B, L, 1]\n",
" hist_exog = windows_batch['hist_exog'] # [B, L, X]\n",
" futr_exog = windows_batch['futr_exog'] # [B, L + h, F]\n",
" stat_exog = windows_batch['stat_exog'] # [B, S]\n",
"\n",
" batch_size, seq_len = x.shape[:2] # B = batch_size, L = seq_len\n",
- " insample_y = windows_batch['insample_y'].unsqueeze(-1) \n",
+ " insample_y = windows_batch['insample_y'] \n",
" \n",
" # Concatenate x_t with future exogenous of input\n",
" if self.futr_exog_size > 0: \n",
@@ -268,9 +271,7 @@
" # Apply softmax for weighted input predictions\n",
" weights = weights.reshape(batch_size, seq_len, -1) # [B, L * h] -> [B, L, h]\n",
" x = F.softmax(weights, dim=1) * insample_y # [B, L, h] * [B, L, 1] = [B, L, h]\n",
- " output = torch.sum(x, dim=1).unsqueeze(-1) # [B, L, h] -> [B, h, 1]\n",
- "\n",
- " forecast = self.loss.domain_map(output) # [B, h, 1] -> [B, h, 1]\n",
+ " forecast = torch.sum(x, dim=1).unsqueeze(-1) # [B, L, h] -> [B, h, 1]\n",
"\n",
" return forecast"
]
@@ -302,6 +303,15 @@
"show_doc(DeepNPTS.predict, name='DeepNPTS.predict', title_level=3)"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "check_model(DeepNPTS, [\"airpassengers\"])"
+ ]
+ },
{
"attachments": {},
"cell_type": "markdown",
diff --git a/nbs/models.dilated_rnn.ipynb b/nbs/models.dilated_rnn.ipynb
index 4b3bd374f..b18c5449f 100644
--- a/nbs/models.dilated_rnn.ipynb
+++ b/nbs/models.dilated_rnn.ipynb
@@ -13,7 +13,16 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The autoreload extension is already loaded. To reload it, use:\n",
+ " %reload_ext autoreload\n"
+ ]
+ }
+ ],
"source": [
"#| hide\n",
"%load_ext autoreload\n",
@@ -58,8 +67,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from nbdev.showdoc import show_doc\n",
- "from neuralforecast.utils import generate_series"
+ "from neuralforecast.utils import generate_series\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -75,7 +87,7 @@
"import torch.nn as nn\n",
"\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_recurrent import BaseRecurrent\n",
+ "from neuralforecast.common._base_model import BaseModel\n",
"from neuralforecast.common._modules import MLP"
]
},
@@ -324,8 +336,8 @@
"\n",
" blocks = [dilated_outputs[:, i * batchsize: (i + 1) * batchsize, :] for i in range(rate)]\n",
"\n",
- " interleaved = torch.stack((blocks)).transpose(1, 0).contiguous()\n",
- " interleaved = interleaved.view(dilated_outputs.size(0) * rate,\n",
+ " interleaved = torch.stack((blocks)).transpose(1, 0)\n",
+ " interleaved = interleaved.reshape(dilated_outputs.size(0) * rate,\n",
" batchsize,\n",
" dilated_outputs.size(2))\n",
" return interleaved\n",
@@ -359,7 +371,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class DilatedRNN(BaseRecurrent):\n",
+ "class DilatedRNN(BaseModel):\n",
" \"\"\" DilatedRNN\n",
"\n",
" **Parameters:**
\n",
@@ -398,24 +410,26 @@
" `**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'recurrent'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = True\n",
- " EXOGENOUS_STAT = True \n",
+ " EXOGENOUS_STAT = True\n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h: int,\n",
- " input_size: int = -1,\n",
+ " input_size: int,\n",
" inference_input_size: int = -1,\n",
" cell_type: str = 'LSTM',\n",
" dilations: List[List[int]] = [[1, 2], [4, 8]],\n",
- " encoder_hidden_size: int = 200,\n",
+ " encoder_hidden_size: int = 128,\n",
" context_size: int = 10,\n",
- " decoder_hidden_size: int = 200,\n",
+ " decoder_hidden_size: int = 128,\n",
" decoder_layers: int = 2,\n",
" futr_exog_list = None,\n",
" hist_exog_list = None,\n",
" stat_exog_list = None,\n",
+ " exclude_insample_y = False,\n",
" loss = MAE(),\n",
" valid_loss = None,\n",
" max_steps: int = 1000,\n",
@@ -425,6 +439,9 @@
" val_check_steps: int = 100,\n",
" batch_size = 32,\n",
" valid_batch_size: Optional[int] = None,\n",
+ " windows_batch_size = 128,\n",
+ " inference_windows_batch_size = 1024,\n",
+ " start_padding_enabled = False,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'robust',\n",
" random_seed: int = 1,\n",
@@ -439,7 +456,10 @@
" super(DilatedRNN, self).__init__(\n",
" h=h,\n",
" input_size=input_size,\n",
- " inference_input_size=inference_input_size,\n",
+ " futr_exog_list=futr_exog_list,\n",
+ " hist_exog_list=hist_exog_list,\n",
+ " stat_exog_list=stat_exog_list,\n",
+ " exclude_insample_y = exclude_insample_y,\n",
" loss=loss,\n",
" valid_loss=valid_loss,\n",
" max_steps=max_steps,\n",
@@ -449,13 +469,14 @@
" val_check_steps=val_check_steps,\n",
" batch_size=batch_size,\n",
" valid_batch_size=valid_batch_size,\n",
+ " windows_batch_size=windows_batch_size,\n",
+ " inference_windows_batch_size=inference_windows_batch_size,\n",
+ " start_padding_enabled=start_padding_enabled,\n",
+ " step_size=step_size,\n",
" scaler_type=scaler_type,\n",
- " futr_exog_list=futr_exog_list,\n",
- " hist_exog_list=hist_exog_list,\n",
- " stat_exog_list=stat_exog_list,\n",
+ " random_seed=random_seed,\n",
" num_workers_loader=num_workers_loader,\n",
" drop_last_loader=drop_last_loader,\n",
- " random_seed=random_seed,\n",
" optimizer=optimizer,\n",
" optimizer_kwargs=optimizer_kwargs,\n",
" lr_scheduler=lr_scheduler,\n",
@@ -477,14 +498,12 @@
" self.decoder_layers = decoder_layers\n",
"\n",
" # RNN input size (1 for target variable y)\n",
- " input_encoder = 1 + self.hist_exog_size + self.stat_exog_size\n",
+ " input_encoder = 1 + self.hist_exog_size + self.stat_exog_size + self.futr_exog_size\n",
"\n",
" # Instantiate model\n",
" layers = []\n",
" for grp_num in range(len(self.dilations)):\n",
- " if grp_num == 0:\n",
- " input_encoder = 1 + self.hist_exog_size + self.stat_exog_size\n",
- " else:\n",
+ " if grp_num > 0:\n",
" input_encoder = self.encoder_hidden_size\n",
" layer = DRNN(input_encoder,\n",
" self.encoder_hidden_size,\n",
@@ -496,11 +515,11 @@
" self.rnn_stack = nn.Sequential(*layers)\n",
"\n",
" # Context adapter\n",
- " self.context_adapter = nn.Linear(in_features=self.encoder_hidden_size + self.futr_exog_size * h,\n",
- " out_features=self.context_size * h)\n",
+ " self.context_adapter = nn.Linear(in_features=self.input_size,\n",
+ " out_features=h)\n",
"\n",
" # Decoder MLP\n",
- " self.mlp_decoder = MLP(in_features=self.context_size + self.futr_exog_size,\n",
+ " self.mlp_decoder = MLP(in_features=self.encoder_hidden_size + self.futr_exog_size,\n",
" out_features=self.loss.outputsize_multiplier,\n",
" hidden_size=self.decoder_hidden_size,\n",
" num_layers=self.decoder_layers,\n",
@@ -510,22 +529,23 @@
" def forward(self, windows_batch):\n",
" \n",
" # Parse windows_batch\n",
- " encoder_input = windows_batch['insample_y'] # [B, seq_len, 1]\n",
- " futr_exog = windows_batch['futr_exog']\n",
- " hist_exog = windows_batch['hist_exog']\n",
- " stat_exog = windows_batch['stat_exog']\n",
- "\n",
- " # Concatenate y, historic and static inputs\n",
- " # [B, C, seq_len, 1] -> [B, seq_len, C]\n",
- " # Contatenate [ Y_t, | X_{t-L},..., X_{t} | S ]\n",
+ " encoder_input = windows_batch['insample_y'] # [B, L, 1]\n",
+ " futr_exog = windows_batch['futr_exog'] # [B, L + h, F]\n",
+ " hist_exog = windows_batch['hist_exog'] # [B, L, X]\n",
+ " stat_exog = windows_batch['stat_exog'] # [B, S]\n",
+ "\n",
+ " # Concatenate y, historic and static inputs \n",
" batch_size, seq_len = encoder_input.shape[:2]\n",
" if self.hist_exog_size > 0:\n",
- " hist_exog = hist_exog.permute(0,2,1,3).squeeze(-1) # [B, X, seq_len, 1] -> [B, seq_len, X]\n",
- " encoder_input = torch.cat((encoder_input, hist_exog), dim=2)\n",
+ " encoder_input = torch.cat((encoder_input, hist_exog), dim=2) # [B, L, 1] + [B, L, X] -> [B, L, 1 + X]\n",
"\n",
" if self.stat_exog_size > 0:\n",
- " stat_exog = stat_exog.unsqueeze(1).repeat(1, seq_len, 1) # [B, S] -> [B, seq_len, S]\n",
- " encoder_input = torch.cat((encoder_input, stat_exog), dim=2)\n",
+ " stat_exog = stat_exog.unsqueeze(1).repeat(1, seq_len, 1) # [B, S] -> [B, L, S]\n",
+ " encoder_input = torch.cat((encoder_input, stat_exog), dim=2) # [B, L, 1 + X] + [B, L, S] -> [B, L, 1 + X + S]\n",
+ "\n",
+ " if self.futr_exog_size > 0:\n",
+ " encoder_input = torch.cat((encoder_input, \n",
+ " futr_exog[:, :seq_len]), dim=2) # [B, L, 1 + X + S] + [B, L, F] -> [B, L, 1 + X + S + F]\n",
"\n",
" # DilatedRNN forward\n",
" for layer_num in range(len(self.rnn_stack)):\n",
@@ -535,25 +555,313 @@
" output += residual\n",
" encoder_input = output\n",
"\n",
- " if self.futr_exog_size > 0:\n",
- " futr_exog = futr_exog.permute(0,2,3,1)[:,:,1:,:] # [B, F, seq_len, 1+H] -> [B, seq_len, H, F]\n",
- " encoder_input = torch.cat(( encoder_input, futr_exog.reshape(batch_size, seq_len, -1)), dim=2)\n",
- "\n",
" # Context adapter\n",
- " context = self.context_adapter(encoder_input)\n",
- " context = context.reshape(batch_size, seq_len, self.h, self.context_size)\n",
+ " output = output.permute(0, 2, 1) # [B, L, C] -> [B, C, L]\n",
+ " context = self.context_adapter(output) # [B, C, L] -> [B, C, h]\n",
"\n",
" # Residual connection with futr_exog\n",
" if self.futr_exog_size > 0:\n",
- " context = torch.cat((context, futr_exog), dim=-1)\n",
+ " futr_exog_futr = futr_exog[:, seq_len:].permute(0, 2, 1) # [B, h, F] -> [B, F, h]\n",
+ " context = torch.cat((context, futr_exog_futr), \n",
+ " dim=1) # [B, C, h] + [B, F, h] = [B, C + F, h]\n",
"\n",
" # Final forecast\n",
- " output = self.mlp_decoder(context)\n",
- " output = self.loss.domain_map(output)\n",
+ " context = context.permute(0, 2, 1) # [B, C + F, h] -> [B, h, C + F]\n",
+ " output = self.mlp_decoder(context) # [B, h, C + F] -> [B, h, n_output]\n",
" \n",
" return output"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "---\n",
+ "\n",
+ "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/models/dilated_rnn.py#L289){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+ "\n",
+ "### DilatedRNN\n",
+ "\n",
+ "> DilatedRNN (h:int, input_size:int, inference_input_size:int=-1,\n",
+ "> cell_type:str='LSTM', dilations:List[List[int]]=[[1, 2], [4,\n",
+ "> 8]], encoder_hidden_size:int=200, context_size:int=10,\n",
+ "> decoder_hidden_size:int=200, decoder_layers:int=2,\n",
+ "> futr_exog_list=None, hist_exog_list=None,\n",
+ "> stat_exog_list=None, exclude_insample_y=False, loss=MAE(),\n",
+ "> valid_loss=None, max_steps:int=1000,\n",
+ "> learning_rate:float=0.001, num_lr_decays:int=3,\n",
+ "> early_stop_patience_steps:int=-1, val_check_steps:int=100,\n",
+ "> batch_size=32, valid_batch_size:Optional[int]=None,\n",
+ "> windows_batch_size=1024, inference_windows_batch_size=1024,\n",
+ "> start_padding_enabled=False, step_size:int=1,\n",
+ "> scaler_type:str='robust', random_seed:int=1,\n",
+ "> num_workers_loader:int=0, drop_last_loader:bool=False,\n",
+ "> optimizer=None, optimizer_kwargs=None, lr_scheduler=None,\n",
+ "> lr_scheduler_kwargs=None, **trainer_kwargs)\n",
+ "\n",
+ "*DilatedRNN\n",
+ "\n",
+ "**Parameters:**
\n",
+ "`h`: int, forecast horizon.
\n",
+ "`input_size`: int, maximum sequence length for truncated train backpropagation. Default -1 uses all history.
\n",
+ "`inference_input_size`: int, maximum sequence length for truncated inference. Default -1 uses all history.
\n",
+ "`cell_type`: str, type of RNN cell to use. Options: 'GRU', 'RNN', 'LSTM', 'ResLSTM', 'AttentiveLSTM'.
\n",
+ "`dilations`: int list, dilations betweem layers.
\n",
+ "`encoder_hidden_size`: int=200, units for the RNN's hidden state size.
\n",
+ "`context_size`: int=10, size of context vector for each timestamp on the forecasting window.
\n",
+ "`decoder_hidden_size`: int=200, size of hidden layer for the MLP decoder.
\n",
+ "`decoder_layers`: int=2, number of layers for the MLP decoder.
\n",
+ "`futr_exog_list`: str list, future exogenous columns.
\n",
+ "`hist_exog_list`: str list, historic exogenous columns.
\n",
+ "`stat_exog_list`: str list, static exogenous columns.
\n",
+ "`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n",
+ "`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n",
+ "`max_steps`: int, maximum number of training steps.
\n",
+ "`learning_rate`: float, Learning rate between (0, 1).
\n",
+ "`num_lr_decays`: int, Number of learning rate decays, evenly distributed across max_steps.
\n",
+ "`early_stop_patience_steps`: int, Number of validation iterations before early stopping.
\n",
+ "`val_check_steps`: int, Number of training steps between every validation loss check.
\n",
+ "`batch_size`: int=32, number of different series in each batch.
\n",
+ "`valid_batch_size`: int=None, number of different series in each validation and test batch.
\n",
+ "`step_size`: int=1, step size between each window of temporal data.
\n",
+ "`scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
+ "`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n",
+ "`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n",
+ "`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n",
+ "`alias`: str, optional, Custom name of the model.
\n",
+ "`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n",
+ "`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n",
+ "`lr_scheduler`: Subclass of 'torch.optim.lr_scheduler.LRScheduler', optional, user specified lr_scheduler instead of the default choice (StepLR).
\n",
+ "`lr_scheduler_kwargs`: dict, optional, list of parameters used by the user specified `lr_scheduler`.
\n",
+ "`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
*"
+ ],
+ "text/plain": [
+ "---\n",
+ "\n",
+ "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/models/dilated_rnn.py#L289){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+ "\n",
+ "### DilatedRNN\n",
+ "\n",
+ "> DilatedRNN (h:int, input_size:int, inference_input_size:int=-1,\n",
+ "> cell_type:str='LSTM', dilations:List[List[int]]=[[1, 2], [4,\n",
+ "> 8]], encoder_hidden_size:int=200, context_size:int=10,\n",
+ "> decoder_hidden_size:int=200, decoder_layers:int=2,\n",
+ "> futr_exog_list=None, hist_exog_list=None,\n",
+ "> stat_exog_list=None, exclude_insample_y=False, loss=MAE(),\n",
+ "> valid_loss=None, max_steps:int=1000,\n",
+ "> learning_rate:float=0.001, num_lr_decays:int=3,\n",
+ "> early_stop_patience_steps:int=-1, val_check_steps:int=100,\n",
+ "> batch_size=32, valid_batch_size:Optional[int]=None,\n",
+ "> windows_batch_size=1024, inference_windows_batch_size=1024,\n",
+ "> start_padding_enabled=False, step_size:int=1,\n",
+ "> scaler_type:str='robust', random_seed:int=1,\n",
+ "> num_workers_loader:int=0, drop_last_loader:bool=False,\n",
+ "> optimizer=None, optimizer_kwargs=None, lr_scheduler=None,\n",
+ "> lr_scheduler_kwargs=None, **trainer_kwargs)\n",
+ "\n",
+ "*DilatedRNN\n",
+ "\n",
+ "**Parameters:**
\n",
+ "`h`: int, forecast horizon.
\n",
+ "`input_size`: int, maximum sequence length for truncated train backpropagation. Default -1 uses all history.
\n",
+ "`inference_input_size`: int, maximum sequence length for truncated inference. Default -1 uses all history.
\n",
+ "`cell_type`: str, type of RNN cell to use. Options: 'GRU', 'RNN', 'LSTM', 'ResLSTM', 'AttentiveLSTM'.
\n",
+ "`dilations`: int list, dilations betweem layers.
\n",
+ "`encoder_hidden_size`: int=200, units for the RNN's hidden state size.
\n",
+ "`context_size`: int=10, size of context vector for each timestamp on the forecasting window.
\n",
+ "`decoder_hidden_size`: int=200, size of hidden layer for the MLP decoder.
\n",
+ "`decoder_layers`: int=2, number of layers for the MLP decoder.
\n",
+ "`futr_exog_list`: str list, future exogenous columns.
\n",
+ "`hist_exog_list`: str list, historic exogenous columns.
\n",
+ "`stat_exog_list`: str list, static exogenous columns.
\n",
+ "`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n",
+ "`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n",
+ "`max_steps`: int, maximum number of training steps.
\n",
+ "`learning_rate`: float, Learning rate between (0, 1).
\n",
+ "`num_lr_decays`: int, Number of learning rate decays, evenly distributed across max_steps.
\n",
+ "`early_stop_patience_steps`: int, Number of validation iterations before early stopping.
\n",
+ "`val_check_steps`: int, Number of training steps between every validation loss check.
\n",
+ "`batch_size`: int=32, number of different series in each batch.
\n",
+ "`valid_batch_size`: int=None, number of different series in each validation and test batch.
\n",
+ "`step_size`: int=1, step size between each window of temporal data.
\n",
+ "`scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
+ "`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n",
+ "`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n",
+ "`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n",
+ "`alias`: str, optional, Custom name of the model.
\n",
+ "`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n",
+ "`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n",
+ "`lr_scheduler`: Subclass of 'torch.optim.lr_scheduler.LRScheduler', optional, user specified lr_scheduler instead of the default choice (StepLR).
\n",
+ "`lr_scheduler_kwargs`: dict, optional, list of parameters used by the user specified `lr_scheduler`.
\n",
+ "`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
*"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "show_doc(DilatedRNN)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "---\n",
+ "\n",
+ "### DilatedRNN.fit\n",
+ "\n",
+ "> DilatedRNN.fit (dataset, val_size=0, test_size=0, random_seed=None,\n",
+ "> distributed_config=None)\n",
+ "\n",
+ "*Fit.\n",
+ "\n",
+ "The `fit` method, optimizes the neural network's weights using the\n",
+ "initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n",
+ "and the `loss` function as defined during the initialization.\n",
+ "Within `fit` we use a PyTorch Lightning `Trainer` that\n",
+ "inherits the initialization's `self.trainer_kwargs`, to customize\n",
+ "its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n",
+ "\n",
+ "The method is designed to be compatible with SKLearn-like classes\n",
+ "and in particular to be compatible with the StatsForecast library.\n",
+ "\n",
+ "By default the `model` is not saving training checkpoints to protect\n",
+ "disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n",
+ "\n",
+ "**Parameters:**
\n",
+ "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n",
+ "`val_size`: int, validation size for temporal cross-validation.
\n",
+ "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n",
+ "`test_size`: int, test size for temporal cross-validation.
*"
+ ],
+ "text/plain": [
+ "---\n",
+ "\n",
+ "### DilatedRNN.fit\n",
+ "\n",
+ "> DilatedRNN.fit (dataset, val_size=0, test_size=0, random_seed=None,\n",
+ "> distributed_config=None)\n",
+ "\n",
+ "*Fit.\n",
+ "\n",
+ "The `fit` method, optimizes the neural network's weights using the\n",
+ "initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n",
+ "and the `loss` function as defined during the initialization.\n",
+ "Within `fit` we use a PyTorch Lightning `Trainer` that\n",
+ "inherits the initialization's `self.trainer_kwargs`, to customize\n",
+ "its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n",
+ "\n",
+ "The method is designed to be compatible with SKLearn-like classes\n",
+ "and in particular to be compatible with the StatsForecast library.\n",
+ "\n",
+ "By default the `model` is not saving training checkpoints to protect\n",
+ "disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n",
+ "\n",
+ "**Parameters:**
\n",
+ "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n",
+ "`val_size`: int, validation size for temporal cross-validation.
\n",
+ "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n",
+ "`test_size`: int, test size for temporal cross-validation.
*"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "show_doc(DilatedRNN.fit, name='DilatedRNN.fit')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "---\n",
+ "\n",
+ "### DilatedRNN.predict\n",
+ "\n",
+ "> DilatedRNN.predict (dataset, test_size=None, step_size=1,\n",
+ "> random_seed=None, **data_module_kwargs)\n",
+ "\n",
+ "*Predict.\n",
+ "\n",
+ "Neural network prediction with PL's `Trainer` execution of `predict_step`.\n",
+ "\n",
+ "**Parameters:**
\n",
+ "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n",
+ "`test_size`: int=None, test size for temporal cross-validation.
\n",
+ "`step_size`: int=1, Step size between each window.
\n",
+ "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n",
+ "`**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).*"
+ ],
+ "text/plain": [
+ "---\n",
+ "\n",
+ "### DilatedRNN.predict\n",
+ "\n",
+ "> DilatedRNN.predict (dataset, test_size=None, step_size=1,\n",
+ "> random_seed=None, **data_module_kwargs)\n",
+ "\n",
+ "*Predict.\n",
+ "\n",
+ "Neural network prediction with PL's `Trainer` execution of `predict_step`.\n",
+ "\n",
+ "**Parameters:**
\n",
+ "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n",
+ "`test_size`: int=None, test size for temporal cross-validation.
\n",
+ "`step_size`: int=1, Step size between each window.
\n",
+ "`random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
\n",
+ "`**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).*"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "show_doc(DilatedRNN.predict, name='DilatedRNN.predict')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "DilatedRNN: checking forecast AirPassengers dataset\n"
+ ]
+ }
+ ],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(DilatedRNN, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -565,7 +873,124 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "c:\\Users\\ospra\\OneDrive\\Nixtla\\Repositories\\neuralforecast\\neuralforecast\\common\\_base_model.py:134: UserWarning: Input size too small. Automatically setting input size to 3 * horizon = 36\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "c575af1dd4b545f1a017aa6edc64a115",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Sanity Checking: | | 0/? [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "1e680f1712194b2fa69c3669284867db",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Training: | | 0/? [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "e398bdd3c29d4cb1a2cdf258edb3d0c0",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Validation: | | 0/? [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "372f289b0dd64143a31120abab6a85fa",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Validation: | | 0/? [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "c:\\Users\\ospra\\miniconda3\\envs\\neuralforecast\\lib\\site-packages\\utilsforecast\\processing.py:384: FutureWarning: 'M' is deprecated and will be removed in a future version, please use 'ME' instead.\n",
+ " freq = pd.tseries.frequencies.to_offset(freq)\n",
+ "c:\\Users\\ospra\\miniconda3\\envs\\neuralforecast\\lib\\site-packages\\utilsforecast\\processing.py:438: FutureWarning: 'M' is deprecated and will be removed in a future version, please use 'ME' instead.\n",
+ " freq = pd.tseries.frequencies.to_offset(freq)\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "3e0bf257d4b2449eaa43ab1859bde8f1",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Predicting: | | 0/? [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "c:\\Users\\ospra\\OneDrive\\Nixtla\\Repositories\\neuralforecast\\neuralforecast\\core.py:213: FutureWarning: In a future version the predictions will have the id as a column. You can set the `NIXTLA_ID_AS_COL` environment variable to adopt the new behavior and to suppress this warning.\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
"#| eval: false\n",
"import pandas as pd\n",
diff --git a/nbs/models.dlinear.ipynb b/nbs/models.dlinear.ipynb
index ea1a38a43..4191d5e96 100644
--- a/nbs/models.dlinear.ipynb
+++ b/nbs/models.dlinear.ipynb
@@ -58,7 +58,7 @@
"import torch\n",
"import torch.nn as nn\n",
"\n",
- "from neuralforecast.common._base_windows import BaseWindows\n",
+ "from neuralforecast.common._base_model import BaseModel\n",
"\n",
"from neuralforecast.losses.pytorch import MAE"
]
@@ -70,8 +70,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -135,7 +138,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class DLinear(BaseWindows):\n",
+ "class DLinear(BaseModel):\n",
" \"\"\" DLinear\n",
"\n",
" *Parameters:*
\n",
@@ -173,10 +176,11 @@
"\t- Zeng, Ailing, et al. \"Are transformers effective for time series forecasting?.\" Proceedings of the AAAI conference on artificial intelligence. Vol. 37. No. 9. 2023.\"\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'windows'\n",
" EXOGENOUS_FUTR = False\n",
" EXOGENOUS_HIST = False\n",
" EXOGENOUS_STAT = False\n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h: int, \n",
@@ -256,11 +260,7 @@
"\n",
" def forward(self, windows_batch):\n",
" # Parse windows_batch\n",
- " insample_y = windows_batch['insample_y']\n",
- " #insample_mask = windows_batch['insample_mask']\n",
- " #hist_exog = windows_batch['hist_exog']\n",
- " #stat_exog = windows_batch['stat_exog']\n",
- " #futr_exog = windows_batch['futr_exog']\n",
+ " insample_y = windows_batch['insample_y'].squeeze(-1)\n",
"\n",
" # Parse inputs\n",
" batch_size = len(insample_y)\n",
@@ -272,7 +272,6 @@
" # Final\n",
" forecast = trend_part + seasonal_part\n",
" forecast = forecast.reshape(batch_size, self.h, self.loss.outputsize_multiplier)\n",
- " forecast = self.loss.domain_map(forecast)\n",
" return forecast"
]
},
@@ -303,6 +302,21 @@
"show_doc(DLinear.predict, name='DLinear.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(DLinear, [\"airpassengers\"])"
+ ]
+ },
{
"attachments": {},
"cell_type": "markdown",
@@ -322,7 +336,7 @@
"import matplotlib.pyplot as plt\n",
"\n",
"from neuralforecast import NeuralForecast\n",
- "from neuralforecast.models import DLinear\n",
+ "from neuralforecast import DLinear\n",
"from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic, augment_calendar_df\n",
"\n",
"AirPassengersPanel, calendar_cols = augment_calendar_df(df=AirPassengersPanel, freq='M')\n",
diff --git a/nbs/models.fedformer.ipynb b/nbs/models.fedformer.ipynb
index 2268c058d..5ef61687b 100644
--- a/nbs/models.fedformer.ipynb
+++ b/nbs/models.fedformer.ipynb
@@ -51,6 +51,20 @@
"![Figure 1. FEDformer Architecture.](imgs_models/fedformer.png)"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "import logging\n",
+ "import warnings\n",
+ "from fastcore.test import test_eq\n",
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -67,7 +81,7 @@
"\n",
"from neuralforecast.common._modules import DataEmbedding\n",
"from neuralforecast.common._modules import SeriesDecomp\n",
- "from neuralforecast.common._base_windows import BaseWindows\n",
+ "from neuralforecast.common._base_model import BaseModel\n",
"\n",
"from neuralforecast.losses.pytorch import MAE"
]
@@ -402,7 +416,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class FEDformer(BaseWindows):\n",
+ "class FEDformer(BaseModel):\n",
" \"\"\" FEDformer\n",
"\n",
" The FEDformer model tackles the challenge of finding reliable dependencies on intricate temporal patterns of long-horizon forecasting.\n",
@@ -460,10 +474,11 @@
"\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'windows'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = False\n",
" EXOGENOUS_STAT = False\n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h: int, \n",
@@ -626,13 +641,9 @@
" def forward(self, windows_batch):\n",
" # Parse windows_batch\n",
" insample_y = windows_batch['insample_y']\n",
- " #insample_mask = windows_batch['insample_mask']\n",
- " #hist_exog = windows_batch['hist_exog']\n",
- " #stat_exog = windows_batch['stat_exog']\n",
" futr_exog = windows_batch['futr_exog']\n",
"\n",
" # Parse inputs\n",
- " insample_y = insample_y.unsqueeze(-1) # [Ws,L,1]\n",
" if self.futr_exog_size > 0:\n",
" x_mark_enc = futr_exog[:,:self.input_size,:]\n",
" x_mark_dec = futr_exog[:,-(self.label_len+self.h):,:]\n",
@@ -659,11 +670,60 @@
" trend=trend_init)\n",
" # final\n",
" dec_out = trend_part + seasonal_part\n",
- "\n",
- " forecast = self.loss.domain_map(dec_out[:, -self.h:])\n",
+ " forecast = dec_out[:, -self.h:]\n",
+ " \n",
" return forecast"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "show_doc(FEDformer)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "show_doc(FEDformer.fit, name='FEDformer.fit')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "show_doc(FEDformer.predict, name='FEDformer.predict')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(FEDformer, [\"airpassengers\"])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Usage Example"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -682,6 +742,7 @@
"\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
+ "\n",
"model = FEDformer(h=12,\n",
" input_size=24,\n",
" modes=64,\n",
diff --git a/nbs/models.gru.ipynb b/nbs/models.gru.ipynb
index 7f0608a5f..0793c37be 100644
--- a/nbs/models.gru.ipynb
+++ b/nbs/models.gru.ipynb
@@ -69,7 +69,10 @@
"outputs": [],
"source": [
"#| hide\n",
- "from nbdev.showdoc import show_doc"
+ "import logging\n",
+ "from fastcore.test import test_eq\n",
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -84,9 +87,10 @@
"\n",
"import torch\n",
"import torch.nn as nn\n",
+ "import warnings\n",
"\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_recurrent import BaseRecurrent\n",
+ "from neuralforecast.common._base_model import BaseModel\n",
"from neuralforecast.common._modules import MLP"
]
},
@@ -97,7 +101,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class GRU(BaseRecurrent):\n",
+ "class GRU(BaseModel):\n",
" \"\"\" GRU\n",
"\n",
" Multi Layer Recurrent Network with Gated Units (GRU), and\n",
@@ -105,7 +109,7 @@
" using ADAM stochastic gradient descent. The network accepts static, historic \n",
" and future exogenous data, flattens the inputs.\n",
"\n",
- " **Parameters:**
\n",
+ " **Parameters:**
\n",
" `h`: int, forecast horizon.
\n",
" `input_size`: int, maximum sequence length for truncated train backpropagation. Default -1 uses all history.
\n",
" `inference_input_size`: int, maximum sequence length for truncated inference. Default -1 uses all history.
\n",
@@ -114,7 +118,7 @@
" `encoder_activation`: Optional[str]=None, Deprecated. Activation function in GRU is frozen in PyTorch.
\n",
" `encoder_bias`: bool=True, whether or not to use biases b_ih, b_hh within GRU units.
\n",
" `encoder_dropout`: float=0., dropout regularization applied to GRU outputs.
\n",
- " `context_size`: int=10, size of context vector for each timestamp on the forecasting window.
\n",
+ " `context_size`: deprecated.
\n",
" `decoder_hidden_size`: int=200, size of hidden layer for the MLP decoder.
\n",
" `decoder_layers`: int=2, number of layers for the MLP decoder.
\n",
" `futr_exog_list`: str list, future exogenous columns.
\n",
@@ -142,10 +146,11 @@
" `**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'recurrent'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = True\n",
" EXOGENOUS_STAT = True\n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = True # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h: int,\n",
@@ -156,12 +161,14 @@
" encoder_activation: Optional[str] = None,\n",
" encoder_bias: bool = True,\n",
" encoder_dropout: float = 0.,\n",
- " context_size: int = 10,\n",
- " decoder_hidden_size: int = 200,\n",
+ " context_size: Optional[int] = None,\n",
+ " decoder_hidden_size: int = 128,\n",
" decoder_layers: int = 2,\n",
" futr_exog_list = None,\n",
" hist_exog_list = None,\n",
" stat_exog_list = None,\n",
+ " exclude_insample_y = False,\n",
+ " recurrent = False,\n",
" loss = MAE(),\n",
" valid_loss = None,\n",
" max_steps: int = 1000,\n",
@@ -171,6 +178,10 @@
" val_check_steps: int = 100,\n",
" batch_size=32,\n",
" valid_batch_size: Optional[int] = None,\n",
+ " windows_batch_size = 128,\n",
+ " inference_windows_batch_size = 1024,\n",
+ " start_padding_enabled = False,\n",
+ " step_size: int = 1,\n",
" scaler_type: str='robust',\n",
" random_seed=1,\n",
" num_workers_loader=0,\n",
@@ -181,10 +192,16 @@
" lr_scheduler_kwargs = None,\n",
" dataloader_kwargs = None,\n",
" **trainer_kwargs):\n",
+ " \n",
+ " self.RECURRENT = recurrent\n",
+ "\n",
" super(GRU, self).__init__(\n",
" h=h,\n",
" input_size=input_size,\n",
- " inference_input_size=inference_input_size,\n",
+ " futr_exog_list=futr_exog_list,\n",
+ " hist_exog_list=hist_exog_list,\n",
+ " stat_exog_list=stat_exog_list,\n",
+ " exclude_insample_y = exclude_insample_y,\n",
" loss=loss,\n",
" valid_loss=valid_loss,\n",
" max_steps=max_steps,\n",
@@ -194,13 +211,14 @@
" val_check_steps=val_check_steps,\n",
" batch_size=batch_size,\n",
" valid_batch_size=valid_batch_size,\n",
+ " windows_batch_size=windows_batch_size,\n",
+ " inference_windows_batch_size=inference_windows_batch_size,\n",
+ " start_padding_enabled=start_padding_enabled,\n",
+ " step_size=step_size,\n",
" scaler_type=scaler_type,\n",
- " futr_exog_list=futr_exog_list,\n",
- " hist_exog_list=hist_exog_list,\n",
- " stat_exog_list=stat_exog_list,\n",
+ " random_seed=random_seed,\n",
" num_workers_loader=num_workers_loader,\n",
" drop_last_loader=drop_last_loader,\n",
- " random_seed=random_seed,\n",
" optimizer=optimizer,\n",
" optimizer_kwargs=optimizer_kwargs,\n",
" lr_scheduler=lr_scheduler,\n",
@@ -224,75 +242,82 @@
" self.encoder_dropout = encoder_dropout\n",
" \n",
" # Context adapter\n",
- " self.context_size = context_size\n",
+ " if context_size is not None:\n",
+ " warnings.warn(\"context_size is deprecated and will be removed in future versions.\")\n",
"\n",
" # MLP decoder\n",
" self.decoder_hidden_size = decoder_hidden_size\n",
" self.decoder_layers = decoder_layers\n",
"\n",
" # RNN input size (1 for target variable y)\n",
- " input_encoder = 1 + self.hist_exog_size + self.stat_exog_size\n",
+ " input_encoder = 1 + self.hist_exog_size + self.stat_exog_size + self.futr_exog_size\n",
"\n",
" # Instantiate model\n",
+ " self.rnn_state = None\n",
+ " self.maintain_state = False\n",
" self.hist_encoder = nn.GRU(input_size=input_encoder,\n",
- " hidden_size=self.encoder_hidden_size,\n",
- " num_layers=self.encoder_n_layers,\n",
- " bias=self.encoder_bias,\n",
- " dropout=self.encoder_dropout,\n",
- " batch_first=True)\n",
- "\n",
- " # Context adapter\n",
- " self.context_adapter = nn.Linear(in_features=self.encoder_hidden_size + self.futr_exog_size * h,\n",
- " out_features=self.context_size * h)\n",
+ " hidden_size=self.encoder_hidden_size,\n",
+ " num_layers=self.encoder_n_layers,\n",
+ " bias=self.encoder_bias,\n",
+ " dropout=self.encoder_dropout,\n",
+ " batch_first=True)\n",
"\n",
" # Decoder MLP\n",
- " self.mlp_decoder = MLP(in_features=self.context_size + self.futr_exog_size,\n",
- " out_features=self.loss.outputsize_multiplier,\n",
- " hidden_size=self.decoder_hidden_size,\n",
- " num_layers=self.decoder_layers,\n",
- " activation='ReLU',\n",
- " dropout=0.0)\n",
+ " if self.RECURRENT:\n",
+ " self.proj = nn.Linear(self.encoder_hidden_size, self.loss.outputsize_multiplier)\n",
+ " else:\n",
+ " self.mlp_decoder = MLP(in_features=self.encoder_hidden_size + self.futr_exog_size,\n",
+ " out_features=self.loss.outputsize_multiplier,\n",
+ " hidden_size=self.decoder_hidden_size,\n",
+ " num_layers=self.decoder_layers,\n",
+ " activation='ReLU',\n",
+ " dropout=0.0)\n",
"\n",
" def forward(self, windows_batch):\n",
" \n",
" # Parse windows_batch\n",
- " encoder_input = windows_batch['insample_y'] # [B, seq_len, 1]\n",
- " futr_exog = windows_batch['futr_exog']\n",
- " hist_exog = windows_batch['hist_exog']\n",
- " stat_exog = windows_batch['stat_exog']\n",
+ " encoder_input = windows_batch['insample_y'] # [B, seq_len, 1]\n",
+ " futr_exog = windows_batch['futr_exog'] # [B, seq_len, F]\n",
+ " hist_exog = windows_batch['hist_exog'] # [B, seq_len, X]\n",
+ " stat_exog = windows_batch['stat_exog'] # [B, S]\n",
"\n",
- " # Concatenate y, historic and static inputs\n",
- " # [B, C, seq_len, 1] -> [B, seq_len, C]\n",
- " # Contatenate [ Y_t, | X_{t-L},..., X_{t} | S ]\n",
+ " # Concatenate y, historic and static inputs \n",
" batch_size, seq_len = encoder_input.shape[:2]\n",
" if self.hist_exog_size > 0:\n",
- " hist_exog = hist_exog.permute(0,2,1,3).squeeze(-1) # [B, X, seq_len, 1] -> [B, seq_len, X]\n",
- " encoder_input = torch.cat((encoder_input, hist_exog), dim=2)\n",
+ " encoder_input = torch.cat((encoder_input, hist_exog), dim=2) # [B, seq_len, 1] + [B, seq_len, X] -> [B, seq_len, 1 + X]\n",
"\n",
" if self.stat_exog_size > 0:\n",
- " stat_exog = stat_exog.unsqueeze(1).repeat(1, seq_len, 1) # [B, S] -> [B, seq_len, S]\n",
- " encoder_input = torch.cat((encoder_input, stat_exog), dim=2)\n",
- "\n",
- " # RNN forward\n",
- " hidden_state, _ = self.hist_encoder(encoder_input) # [B, seq_len, rnn_hidden_state]\n",
+ " # print(encoder_input.shape)\n",
+ " stat_exog = stat_exog.unsqueeze(1).repeat(1, seq_len, 1) # [B, S] -> [B, seq_len, S]\n",
+ " encoder_input = torch.cat((encoder_input, stat_exog), dim=2) # [B, seq_len, 1 + X] + [B, seq_len, S] -> [B, seq_len, 1 + X + S]\n",
"\n",
" if self.futr_exog_size > 0:\n",
- " futr_exog = futr_exog.permute(0,2,3,1)[:,:,1:,:] # [B, F, seq_len, 1+H] -> [B, seq_len, H, F]\n",
- " hidden_state = torch.cat(( hidden_state, futr_exog.reshape(batch_size, seq_len, -1)), dim=2)\n",
+ " encoder_input = torch.cat((encoder_input, \n",
+ " futr_exog[:, :seq_len]), dim=2) # [B, seq_len, 1 + X + S] + [B, seq_len, F] -> [B, seq_len, 1 + X + S + F]\n",
"\n",
- " # Context adapter\n",
- " context = self.context_adapter(hidden_state)\n",
- " context = context.reshape(batch_size, seq_len, self.h, self.context_size)\n",
+ " if self.RECURRENT:\n",
+ " if self.maintain_state:\n",
+ " rnn_state = self.rnn_state\n",
+ " else:\n",
+ " rnn_state = None\n",
+ " \n",
+ " output, rnn_state = self.hist_encoder(encoder_input, \n",
+ " rnn_state) # [B, seq_len, rnn_hidden_state]\n",
+ " output = self.proj(output) # [B, seq_len, rnn_hidden_state] -> [B, seq_len, n_output]\n",
+ " if self.maintain_state:\n",
+ " self.rnn_state = rnn_state\n",
+ " else:\n",
+ " hidden_state, _ = self.hist_encoder(encoder_input, None) # [B, seq_len, rnn_hidden_state]\n",
+ " hidden_state = hidden_state[:, -self.h:] # [B, seq_len, rnn_hidden_state] -> [B, h, rnn_hidden_state]\n",
+ " \n",
+ " if self.futr_exog_size > 0:\n",
+ " futr_exog_futr = futr_exog[:, -self.h:] # [B, h, F]\n",
+ " hidden_state = torch.cat((hidden_state, \n",
+ " futr_exog_futr), dim=-1) # [B, h, rnn_hidden_state] + [B, h, F] -> [B, h, rnn_hidden_state + F]\n",
"\n",
- " # Residual connection with futr_exog\n",
- " if self.futr_exog_size > 0:\n",
- " context = torch.cat((context, futr_exog), dim=-1)\n",
+ " output = self.mlp_decoder(hidden_state) # [B, h, rnn_hidden_state + F] -> [B, seq_len, n_output]\n",
"\n",
- " # Final forecast\n",
- " output = self.mlp_decoder(context)\n",
- " output = self.loss.domain_map(output)\n",
- " \n",
- " return output"
+ " return output[:, -self.h:]"
]
},
{
@@ -322,6 +347,21 @@
"show_doc(GRU.predict, name='GRU.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(GRU, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -343,17 +383,15 @@
"# from neuralforecast.models import GRU\n",
"from neuralforecast.losses.pytorch import DistributionLoss\n",
"from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
- "\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
"\n",
"fcst = NeuralForecast(\n",
- " models=[GRU(h=12,input_size=-1,\n",
+ " models=[GRU(h=12, input_size=24,\n",
" loss=DistributionLoss(distribution='Normal', level=[80, 90]),\n",
" scaler_type='robust',\n",
" encoder_n_layers=2,\n",
" encoder_hidden_size=128,\n",
- " context_size=10,\n",
" decoder_hidden_size=128,\n",
" decoder_layers=2,\n",
" max_steps=200,\n",
diff --git a/nbs/models.informer.ipynb b/nbs/models.informer.ipynb
index c8e30137c..3efdeb344 100644
--- a/nbs/models.informer.ipynb
+++ b/nbs/models.informer.ipynb
@@ -71,7 +71,7 @@
" TransDecoderLayer, TransDecoder,\n",
" DataEmbedding, AttentionLayer,\n",
")\n",
- "from neuralforecast.common._base_windows import BaseWindows\n",
+ "from neuralforecast.common._base_model import BaseModel\n",
"\n",
"from neuralforecast.losses.pytorch import MAE"
]
@@ -83,8 +83,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -259,7 +262,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class Informer(BaseWindows):\n",
+ "class Informer(BaseModel):\n",
" \"\"\" Informer\n",
"\n",
"\tThe Informer model tackles the vanilla Transformer computational complexity challenges for long-horizon forecasting. \n",
@@ -317,10 +320,11 @@
"\t- [Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, Wancai Zhang. \"Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting\"](https://arxiv.org/abs/2012.07436)
\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'windows'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = False\n",
" EXOGENOUS_STAT = False\n",
+ " MULTIVARIATE = False\n",
+ " RECURRENT = False\n",
"\n",
" def __init__(self,\n",
" h: int, \n",
@@ -463,17 +467,11 @@
" def forward(self, windows_batch):\n",
" # Parse windows_batch\n",
" insample_y = windows_batch['insample_y']\n",
- " #insample_mask = windows_batch['insample_mask']\n",
- " #hist_exog = windows_batch['hist_exog']\n",
- " #stat_exog = windows_batch['stat_exog']\n",
- "\n",
" futr_exog = windows_batch['futr_exog']\n",
"\n",
- " insample_y = insample_y.unsqueeze(-1) # [Ws,L,1]\n",
- "\n",
" if self.futr_exog_size > 0:\n",
- " x_mark_enc = futr_exog[:,:self.input_size,:]\n",
- " x_mark_dec = futr_exog[:,-(self.label_len+self.h):,:]\n",
+ " x_mark_enc = futr_exog[:, :self.input_size, :]\n",
+ " x_mark_dec = futr_exog[:, -(self.label_len+self.h):, :]\n",
" else:\n",
" x_mark_enc = None\n",
" x_mark_dec = None\n",
@@ -488,7 +486,7 @@
" dec_out = self.decoder(dec_out, enc_out, x_mask=None, \n",
" cross_mask=None)\n",
"\n",
- " forecast = self.loss.domain_map(dec_out[:, -self.h:])\n",
+ " forecast = dec_out[:, -self.h:]\n",
" return forecast"
]
},
@@ -519,6 +517,21 @@
"show_doc(Informer.predict, name='Informer.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(Informer, [\"airpassengers\"])"
+ ]
+ },
{
"attachments": {},
"cell_type": "markdown",
@@ -555,7 +568,7 @@
" futr_exog_list=calendar_cols,\n",
" scaler_type='robust',\n",
" learning_rate=1e-3,\n",
- " max_steps=5,\n",
+ " max_steps=200,\n",
" val_check_steps=50,\n",
" early_stop_patience_steps=2)\n",
"\n",
diff --git a/nbs/models.ipynb b/nbs/models.ipynb
index 018525399..e3a3342a0 100644
--- a/nbs/models.ipynb
+++ b/nbs/models.ipynb
@@ -229,10 +229,10 @@
" \"input_size_multiplier\": [-1, 4, 16, 64],\n",
" \"inference_input_size_multiplier\": [-1],\n",
" \"h\": None,\n",
- " \"encoder_hidden_size\": tune.choice([50, 100, 200, 300]),\n",
+ " \"encoder_hidden_size\": tune.choice([16, 32, 64, 128]),\n",
" \"encoder_n_layers\": tune.randint(1, 4),\n",
" \"context_size\": tune.choice([5, 10, 50]),\n",
- " \"decoder_hidden_size\": tune.choice([64, 128, 256, 512]),\n",
+ " \"decoder_hidden_size\": tune.choice([16, 32, 64, 128]),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
" \"max_steps\": tune.choice([500, 1000]),\n",
" \"batch_size\": tune.choice([16, 32]),\n",
@@ -314,7 +314,7 @@
"source": [
"%%capture\n",
"# Use your own config or AutoRNN.default_config\n",
- "config = dict(max_steps=2, val_check_steps=1, input_size=-1, encoder_hidden_size=8)\n",
+ "config = dict(max_steps=1, val_check_steps=1, input_size=-1, encoder_hidden_size=8)\n",
"model = AutoRNN(h=12, config=config, num_samples=1, cpus=1)\n",
"\n",
"model.fit(dataset=dataset)\n",
@@ -372,10 +372,10 @@
" \"input_size_multiplier\": [-1, 4, 16, 64],\n",
" \"inference_input_size_multiplier\": [-1],\n",
" \"h\": None,\n",
- " \"encoder_hidden_size\": tune.choice([50, 100, 200, 300]),\n",
+ " \"encoder_hidden_size\": tune.choice([16, 32, 64, 128]),\n",
" \"encoder_n_layers\": tune.randint(1, 4),\n",
" \"context_size\": tune.choice([5, 10, 50]),\n",
- " \"decoder_hidden_size\": tune.choice([64, 128, 256, 512]),\n",
+ " \"decoder_hidden_size\": tune.choice([16, 32, 64, 128]),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
" \"max_steps\": tune.choice([500, 1000]),\n",
" \"batch_size\": tune.choice([16, 32]),\n",
@@ -452,7 +452,7 @@
"source": [
"%%capture\n",
"# Use your own config or AutoLSTM.default_config\n",
- "config = dict(max_steps=2, val_check_steps=1, input_size=-1, encoder_hidden_size=8)\n",
+ "config = dict(max_steps=1, val_check_steps=1, input_size=-1, encoder_hidden_size=8)\n",
"model = AutoLSTM(h=12, config=config, num_samples=1, cpus=1)\n",
"\n",
"# Fit and predict\n",
@@ -511,10 +511,10 @@
" \"input_size_multiplier\": [-1, 4, 16, 64],\n",
" \"inference_input_size_multiplier\": [-1],\n",
" \"h\": None,\n",
- " \"encoder_hidden_size\": tune.choice([50, 100, 200, 300]),\n",
+ " \"encoder_hidden_size\": tune.choice([16, 32, 64, 128]),\n",
" \"encoder_n_layers\": tune.randint(1, 4),\n",
" \"context_size\": tune.choice([5, 10, 50]),\n",
- " \"decoder_hidden_size\": tune.choice([64, 128, 256, 512]),\n",
+ " \"decoder_hidden_size\": tune.choice([16, 32, 64, 128]),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
" \"max_steps\": tune.choice([500, 1000]),\n",
" \"batch_size\": tune.choice([16, 32]),\n",
@@ -591,7 +591,7 @@
"source": [
"%%capture\n",
"# Use your own config or AutoGRU.default_config\n",
- "config = dict(max_steps=2, val_check_steps=1, input_size=-1, encoder_hidden_size=8)\n",
+ "config = dict(max_steps=1, val_check_steps=1, input_size=-1, encoder_hidden_size=8)\n",
"model = AutoGRU(h=12, config=config, num_samples=1, cpus=1)\n",
"\n",
"# Fit and predict\n",
@@ -650,9 +650,9 @@
" \"input_size_multiplier\": [-1, 4, 16, 64],\n",
" \"inference_input_size_multiplier\": [-1],\n",
" \"h\": None,\n",
- " \"encoder_hidden_size\": tune.choice([50, 100, 200, 300]),\n",
+ " \"encoder_hidden_size\": tune.choice([16, 32, 64, 128]),\n",
" \"context_size\": tune.choice([5, 10, 50]),\n",
- " \"decoder_hidden_size\": tune.choice([64, 128]),\n",
+ " \"decoder_hidden_size\": tune.choice([32, 64]),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
" \"max_steps\": tune.choice([500, 1000]),\n",
" \"batch_size\": tune.choice([16, 32]),\n",
@@ -729,7 +729,7 @@
"source": [
"%%capture\n",
"# Use your own config or AutoTCN.default_config\n",
- "config = dict(max_steps=2, val_check_steps=1, input_size=-1, encoder_hidden_size=8)\n",
+ "config = dict(max_steps=1, val_check_steps=1, input_size=-1, encoder_hidden_size=8)\n",
"model = AutoTCN(h=12, config=config, num_samples=1, cpus=1)\n",
"\n",
"# Fit and predict\n",
@@ -927,10 +927,10 @@
" \"inference_input_size_multiplier\": [-1],\n",
" \"h\": None,\n",
" \"cell_type\": tune.choice(['LSTM', 'GRU']),\n",
- " \"encoder_hidden_size\": tune.choice([50, 100, 200, 300]),\n",
+ " \"encoder_hidden_size\": tune.choice([16, 32, 64, 128]),\n",
" \"dilations\": tune.choice([ [[1, 2], [4, 8]], [[1, 2, 4, 8]] ]),\n",
" \"context_size\": tune.choice([5, 10, 50]),\n",
- " \"decoder_hidden_size\": tune.choice([64, 128, 256, 512]),\n",
+ " \"decoder_hidden_size\": tune.choice([16, 32, 64, 128]),\n",
" \"learning_rate\": tune.loguniform(1e-4, 1e-1),\n",
" \"max_steps\": tune.choice([500, 1000]),\n",
" \"batch_size\": tune.choice([16, 32]),\n",
@@ -1007,7 +1007,7 @@
"source": [
"%%capture\n",
"# Use your own config or AutoDilatedRNN.default_config\n",
- "config = dict(max_steps=2, val_check_steps=1, input_size=-1, encoder_hidden_size=8)\n",
+ "config = dict(max_steps=1, val_check_steps=1, input_size=-1, encoder_hidden_size=8)\n",
"model = AutoDilatedRNN(h=12, config=config, num_samples=1, cpus=1)\n",
"\n",
"# Fit and predict\n",
@@ -1290,7 +1290,7 @@
"source": [
"%%capture\n",
"# Use your own config or AutoMLP.default_config\n",
- "config = dict(max_steps=2, val_check_steps=1, input_size=12, hidden_size=8)\n",
+ "config = dict(max_steps=1, val_check_steps=1, input_size=12, hidden_size=8)\n",
"model = AutoMLP(h=12, config=config, num_samples=1, cpus=1)\n",
"\n",
"# Fit and predict\n",
@@ -1425,7 +1425,7 @@
"source": [
"%%capture\n",
"# Use your own config or AutoNBEATS.default_config\n",
- "config = dict(max_steps=2, val_check_steps=1, input_size=12,\n",
+ "config = dict(max_steps=1, val_check_steps=1, input_size=12,\n",
" mlp_units=3*[[8, 8]])\n",
"model = AutoNBEATS(h=12, config=config, num_samples=1, cpus=1)\n",
"\n",
@@ -1561,7 +1561,7 @@
"source": [
"%%capture\n",
"# Use your own config or AutoNBEATSx.default_config\n",
- "config = dict(max_steps=2, val_check_steps=1, input_size=12,\n",
+ "config = dict(max_steps=1, val_check_steps=1, input_size=12,\n",
" mlp_units=3*[[8, 8]])\n",
"model = AutoNBEATSx(h=12, config=config, num_samples=1, cpus=1)\n",
"\n",
@@ -1703,7 +1703,7 @@
"source": [
"%%capture\n",
"# Use your own config or AutoNHITS.default_config\n",
- "config = dict(max_steps=2, val_check_steps=1, input_size=12, \n",
+ "config = dict(max_steps=1, val_check_steps=1, input_size=12, \n",
" mlp_units=3 * [[8, 8]])\n",
"model = AutoNHITS(h=12, config=config, num_samples=1, cpus=1)\n",
"\n",
@@ -1841,7 +1841,7 @@
"source": [
"%%capture\n",
"# Use your own config or AutoDLinear.default_config\n",
- "config = dict(max_steps=2, val_check_steps=1, input_size=12)\n",
+ "config = dict(max_steps=1, val_check_steps=1, input_size=12)\n",
"model = AutoDLinear(h=12, config=config, num_samples=1, cpus=1)\n",
"\n",
"# Fit and predict\n",
@@ -1976,7 +1976,7 @@
"source": [
"%%capture\n",
"# Use your own config or AutoNLinear.default_config\n",
- "config = dict(max_steps=2, val_check_steps=1, input_size=12)\n",
+ "config = dict(max_steps=1, val_check_steps=1, input_size=12)\n",
"model = AutoNLinear(h=12, config=config, num_samples=1, cpus=1)\n",
"\n",
"# Fit and predict\n",
@@ -2119,7 +2119,7 @@
"source": [
"%%capture\n",
"# Use your own config or AutoTiDE.default_config\n",
- "config = dict(max_steps=2, val_check_steps=1, input_size=12)\n",
+ "config = dict(max_steps=1, val_check_steps=1, input_size=12)\n",
"model = AutoTiDE(h=12, config=config, num_samples=1, cpus=1)\n",
"\n",
"# Fit and predict\n",
@@ -2257,7 +2257,7 @@
"source": [
"%%capture\n",
"# Use your own config or AutoDeepNPTS.default_config\n",
- "config = dict(max_steps=2, val_check_steps=1, input_size=12)\n",
+ "config = dict(max_steps=1, val_check_steps=1, input_size=12)\n",
"model = AutoDeepNPTS(h=12, config=config, num_samples=1, cpus=1)\n",
"\n",
"# Fit and predict\n",
@@ -2403,7 +2403,7 @@
"source": [
"%%capture\n",
"# Use your own config or AutoKAN.default_config\n",
- "config = dict(max_steps=2, val_check_steps=1, input_size=12)\n",
+ "config = dict(max_steps=1, val_check_steps=1, input_size=12)\n",
"model = AutoKAN(h=12, config=config, num_samples=1, cpus=1)\n",
"\n",
"# Fit and predict\n",
diff --git a/nbs/models.itransformer.ipynb b/nbs/models.itransformer.ipynb
index 5e134cfa0..b226d66dc 100644
--- a/nbs/models.itransformer.ipynb
+++ b/nbs/models.itransformer.ipynb
@@ -27,8 +27,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -69,9 +72,9 @@
"import numpy as np\n",
"\n",
"from math import sqrt\n",
- "\n",
+ "from typing import Optional\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_multivariate import BaseMultivariate\n",
+ "from neuralforecast.common._base_model import BaseModel\n",
"\n",
"from neuralforecast.common._modules import TransEncoder, TransEncoderLayer, AttentionLayer"
]
@@ -195,7 +198,7 @@
"source": [
"#| export\n",
"\n",
- "class iTransformer(BaseMultivariate):\n",
+ "class iTransformer(BaseModel):\n",
"\n",
" \"\"\" iTransformer\n",
"\n",
@@ -222,6 +225,10 @@
" `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n",
" `val_check_steps`: int=100, Number of training steps between every validation loss check.
\n",
" `batch_size`: int=32, number of different series in each batch.
\n",
+ " `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n",
+ " `windows_batch_size`: int=128, number of windows to sample in each training batch, default uses all.
\n",
+ " `inference_windows_batch_size`: int=128, number of windows to sample in each inference batch, -1 uses all.
\n",
+ " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
" `step_size`: int=1, step size between each window of temporal data.
\n",
" `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n",
@@ -240,10 +247,11 @@
" \"\"\"\n",
"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'multivariate'\n",
" EXOGENOUS_FUTR = False\n",
" EXOGENOUS_HIST = False\n",
" EXOGENOUS_STAT = False\n",
+ " MULTIVARIATE = True\n",
+ " RECURRENT = False\n",
"\n",
" def __init__(self,\n",
" h,\n",
@@ -252,6 +260,7 @@
" futr_exog_list = None,\n",
" hist_exog_list = None,\n",
" stat_exog_list = None,\n",
+ " exclude_insample_y = False,\n",
" hidden_size: int = 512,\n",
" n_heads: int = 8,\n",
" e_layers: int = 2,\n",
@@ -268,6 +277,10 @@
" early_stop_patience_steps: int =-1,\n",
" val_check_steps: int = 100,\n",
" batch_size: int = 32,\n",
+ " valid_batch_size: Optional[int] = None,\n",
+ " windows_batch_size = 128,\n",
+ " inference_windows_batch_size = 128,\n",
+ " start_padding_enabled = False,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'identity',\n",
" random_seed: int = 1,\n",
@@ -286,6 +299,7 @@
" stat_exog_list = None,\n",
" futr_exog_list = None,\n",
" hist_exog_list = None,\n",
+ " exclude_insample_y = exclude_insample_y,\n",
" loss=loss,\n",
" valid_loss=valid_loss,\n",
" max_steps=max_steps,\n",
@@ -294,6 +308,10 @@
" early_stop_patience_steps=early_stop_patience_steps,\n",
" val_check_steps=val_check_steps,\n",
" batch_size=batch_size,\n",
+ " valid_batch_size=valid_batch_size,\n",
+ " windows_batch_size=windows_batch_size,\n",
+ " inference_windows_batch_size=inference_windows_batch_size,\n",
+ " start_padding_enabled=start_padding_enabled,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" random_seed=random_seed,\n",
@@ -335,8 +353,8 @@
" norm_layer=torch.nn.LayerNorm(self.hidden_size)\n",
" )\n",
"\n",
- " self.projector = nn.Linear(self.hidden_size, h, bias=True)\n",
- " \n",
+ " self.projector = nn.Linear(self.hidden_size, h * self.loss.outputsize_multiplier, bias=True)\n",
+ "\n",
" def forecast(self, x_enc):\n",
" if self.use_norm:\n",
" # Normalization from Non-stationary Transformer\n",
@@ -363,8 +381,8 @@
"\n",
" if self.use_norm:\n",
" # De-Normalization from Non-stationary Transformer\n",
- " dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.h, 1))\n",
- " dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.h, 1))\n",
+ " dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.h * self.loss.outputsize_multiplier, 1))\n",
+ " dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.h * self.loss.outputsize_multiplier, 1))\n",
"\n",
" return dec_out\n",
" \n",
@@ -372,14 +390,11 @@
" insample_y = windows_batch['insample_y']\n",
"\n",
" y_pred = self.forecast(insample_y)\n",
- " y_pred = y_pred[:, -self.h:, :]\n",
- " y_pred = self.loss.domain_map(y_pred)\n",
+ " y_pred = y_pred.reshape(insample_y.shape[0],\n",
+ " self.h,\n",
+ " -1)\n",
"\n",
- " # domain_map might have squeezed the last dimension in case n_series == 1\n",
- " if y_pred.ndim == 2:\n",
- " return y_pred.unsqueeze(-1)\n",
- " else:\n",
- " return y_pred\n"
+ " return y_pred"
]
},
{
@@ -409,6 +424,21 @@
"show_doc(iTransformer.predict, name='iTransformer.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(iTransformer, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -448,7 +478,8 @@
" loss=MSE(),\n",
" valid_loss=MAE(),\n",
" early_stop_patience_steps=3,\n",
- " batch_size=32)\n",
+ " batch_size=32,\n",
+ " max_steps=100)\n",
"\n",
"fcst = NeuralForecast(models=[model], freq='M')\n",
"fcst.fit(df=Y_train_df, static_df=AirPassengersStatic, val_size=12)\n",
diff --git a/nbs/models.kan.ipynb b/nbs/models.kan.ipynb
index ac7cc5e2b..003a8e3d0 100644
--- a/nbs/models.kan.ipynb
+++ b/nbs/models.kan.ipynb
@@ -61,8 +61,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -80,7 +83,7 @@
"import torch.nn.functional as F\n",
"\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_windows import BaseWindows"
+ "from neuralforecast.common._base_model import BaseModel"
]
},
{
@@ -318,7 +321,7 @@
"source": [
"#| export\n",
"\n",
- "class KAN(BaseWindows):\n",
+ "class KAN(BaseModel):\n",
" \"\"\" KAN\n",
"\n",
" Simple Kolmogorov-Arnold Network (KAN).\n",
@@ -372,10 +375,11 @@
" \"\"\"\n",
"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'windows'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = True\n",
" EXOGENOUS_STAT = True \n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h,\n",
@@ -495,7 +499,7 @@
" \n",
" def forward(self, windows_batch, update_grid=False):\n",
"\n",
- " insample_y = windows_batch['insample_y']\n",
+ " insample_y = windows_batch['insample_y'].squeeze(-1)\n",
" futr_exog = windows_batch['futr_exog']\n",
" hist_exog = windows_batch['hist_exog']\n",
" stat_exog = windows_batch['stat_exog']\n",
@@ -520,7 +524,6 @@
"\n",
" y_pred = y_pred.reshape(batch_size, self.h, \n",
" self.loss.outputsize_multiplier)\n",
- " y_pred = self.loss.domain_map(y_pred)\n",
" return y_pred\n",
" "
]
@@ -552,6 +555,21 @@
"show_doc(KAN.predict, name='KAN.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(KAN, checks=[\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -574,7 +592,6 @@
"from neuralforecast.losses.pytorch import DistributionLoss\n",
"from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
"\n",
- "\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
"\n",
diff --git a/nbs/models.lstm.ipynb b/nbs/models.lstm.ipynb
index 3eb469306..954e53257 100644
--- a/nbs/models.lstm.ipynb
+++ b/nbs/models.lstm.ipynb
@@ -58,7 +58,10 @@
"outputs": [],
"source": [
"#| hide\n",
- "from nbdev.showdoc import show_doc"
+ "import logging\n",
+ "from fastcore.test import test_eq\n",
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -72,9 +75,10 @@
"\n",
"import torch\n",
"import torch.nn as nn\n",
+ "import warnings\n",
"\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_recurrent import BaseRecurrent\n",
+ "from neuralforecast.common._base_model import BaseModel\n",
"from neuralforecast.common._modules import MLP"
]
},
@@ -85,7 +89,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class LSTM(BaseRecurrent):\n",
+ "class LSTM(BaseModel):\n",
" \"\"\" LSTM\n",
"\n",
" LSTM encoder, with MLP decoder.\n",
@@ -101,7 +105,7 @@
" `encoder_hidden_size`: int=200, units for the LSTM's hidden state size.
\n",
" `encoder_bias`: bool=True, whether or not to use biases b_ih, b_hh within LSTM units.
\n",
" `encoder_dropout`: float=0., dropout regularization applied to LSTM outputs.
\n",
- " `context_size`: int=10, size of context vector for each timestamp on the forecasting window.
\n",
+ " `context_size`: deprecated.
\n",
" `decoder_hidden_size`: int=200, size of hidden layer for the MLP decoder.
\n",
" `decoder_layers`: int=2, number of layers for the MLP decoder.
\n",
" `futr_exog_list`: str list, future exogenous columns.
\n",
@@ -129,25 +133,27 @@
" `**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'recurrent'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = True\n",
" EXOGENOUS_STAT = True\n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = True # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h: int,\n",
- " input_size: int = -1,\n",
- " inference_input_size: int = -1,\n",
+ " input_size: int,\n",
" encoder_n_layers: int = 2,\n",
- " encoder_hidden_size: int = 200,\n",
+ " encoder_hidden_size: int = 128,\n",
" encoder_bias: bool = True,\n",
" encoder_dropout: float = 0.,\n",
- " context_size: int = 10,\n",
- " decoder_hidden_size: int = 200,\n",
+ " context_size: Optional[int] = None,\n",
+ " decoder_hidden_size: int = 128,\n",
" decoder_layers: int = 2,\n",
" futr_exog_list = None,\n",
" hist_exog_list = None,\n",
" stat_exog_list = None,\n",
+ " exclude_insample_y = False,\n",
+ " recurrent = False,\n",
" loss = MAE(),\n",
" valid_loss = None,\n",
" max_steps: int = 1000,\n",
@@ -157,6 +163,10 @@
" val_check_steps: int = 100,\n",
" batch_size = 32,\n",
" valid_batch_size: Optional[int] = None,\n",
+ " windows_batch_size = 128,\n",
+ " inference_windows_batch_size = 1024,\n",
+ " start_padding_enabled = False,\n",
+ " step_size: int = 1,\n",
" scaler_type: str = 'robust',\n",
" random_seed = 1,\n",
" num_workers_loader = 0,\n",
@@ -167,10 +177,16 @@
" lr_scheduler_kwargs = None,\n",
" dataloader_kwargs = None,\n",
" **trainer_kwargs):\n",
+ " \n",
+ " self.RECURRENT = recurrent\n",
+ " \n",
" super(LSTM, self).__init__(\n",
" h=h,\n",
" input_size=input_size,\n",
- " inference_input_size=inference_input_size,\n",
+ " futr_exog_list=futr_exog_list,\n",
+ " hist_exog_list=hist_exog_list,\n",
+ " stat_exog_list=stat_exog_list,\n",
+ " exclude_insample_y = exclude_insample_y,\n",
" loss=loss,\n",
" valid_loss=valid_loss,\n",
" max_steps=max_steps,\n",
@@ -180,13 +196,14 @@
" val_check_steps=val_check_steps,\n",
" batch_size=batch_size,\n",
" valid_batch_size=valid_batch_size,\n",
+ " windows_batch_size=windows_batch_size,\n",
+ " inference_windows_batch_size=inference_windows_batch_size,\n",
+ " start_padding_enabled=start_padding_enabled,\n",
+ " step_size=step_size,\n",
" scaler_type=scaler_type,\n",
- " futr_exog_list=futr_exog_list,\n",
- " hist_exog_list=hist_exog_list,\n",
- " stat_exog_list=stat_exog_list,\n",
+ " random_seed=random_seed,\n",
" num_workers_loader=num_workers_loader,\n",
" drop_last_loader=drop_last_loader,\n",
- " random_seed=random_seed,\n",
" optimizer=optimizer,\n",
" optimizer_kwargs=optimizer_kwargs,\n",
" lr_scheduler=lr_scheduler,\n",
@@ -202,75 +219,80 @@
" self.encoder_dropout = encoder_dropout\n",
" \n",
" # Context adapter\n",
- " self.context_size = context_size\n",
+ " if context_size is not None:\n",
+ " warnings.warn(\"context_size is deprecated and will be removed in future versions.\")\n",
"\n",
" # MLP decoder\n",
" self.decoder_hidden_size = decoder_hidden_size\n",
" self.decoder_layers = decoder_layers\n",
"\n",
" # LSTM input size (1 for target variable y)\n",
- " input_encoder = 1 + self.hist_exog_size + self.stat_exog_size\n",
+ " input_encoder = 1 + self.hist_exog_size + self.stat_exog_size + self.futr_exog_size\n",
"\n",
" # Instantiate model\n",
+ " self.rnn_state = None\n",
+ " self.maintain_state = False\n",
" self.hist_encoder = nn.LSTM(input_size=input_encoder,\n",
" hidden_size=self.encoder_hidden_size,\n",
" num_layers=self.encoder_n_layers,\n",
" bias=self.encoder_bias,\n",
" dropout=self.encoder_dropout,\n",
- " batch_first=True)\n",
- "\n",
- " # Context adapter\n",
- " self.context_adapter = nn.Linear(in_features=self.encoder_hidden_size + self.futr_exog_size * h,\n",
- " out_features=self.context_size * h)\n",
+ " batch_first=True,\n",
+ " proj_size=self.loss.outputsize_multiplier if self.RECURRENT else 0)\n",
"\n",
" # Decoder MLP\n",
- " self.mlp_decoder = MLP(in_features=self.context_size + self.futr_exog_size,\n",
- " out_features=self.loss.outputsize_multiplier,\n",
- " hidden_size=self.decoder_hidden_size,\n",
- " num_layers=self.decoder_layers,\n",
- " activation='ReLU',\n",
- " dropout=0.0)\n",
+ " if not self.RECURRENT:\n",
+ " self.mlp_decoder = MLP(in_features=self.encoder_hidden_size + self.futr_exog_size,\n",
+ " out_features=self.loss.outputsize_multiplier,\n",
+ " hidden_size=self.decoder_hidden_size,\n",
+ " num_layers=self.decoder_layers,\n",
+ " activation='ReLU',\n",
+ " dropout=0.0)\n",
"\n",
" def forward(self, windows_batch):\n",
" \n",
" # Parse windows_batch\n",
- " encoder_input = windows_batch['insample_y'] # [B, seq_len, 1]\n",
- " futr_exog = windows_batch['futr_exog']\n",
- " hist_exog = windows_batch['hist_exog']\n",
- " stat_exog = windows_batch['stat_exog']\n",
+ " encoder_input = windows_batch['insample_y'] # [B, seq_len, 1]\n",
+ " futr_exog = windows_batch['futr_exog'] # [B, seq_len, F]\n",
+ " hist_exog = windows_batch['hist_exog'] # [B, seq_len, X]\n",
+ " stat_exog = windows_batch['stat_exog'] # [B, S]\n",
"\n",
- " # Concatenate y, historic and static inputs\n",
- " # [B, C, seq_len, 1] -> [B, seq_len, C]\n",
- " # Contatenate [ Y_t, | X_{t-L},..., X_{t} | S ]\n",
+ " # Concatenate y, historic and static inputs \n",
" batch_size, seq_len = encoder_input.shape[:2]\n",
" if self.hist_exog_size > 0:\n",
- " hist_exog = hist_exog.permute(0,2,1,3).squeeze(-1) # [B, X, seq_len, 1] -> [B, seq_len, X]\n",
- " encoder_input = torch.cat((encoder_input, hist_exog), dim=2)\n",
+ " encoder_input = torch.cat((encoder_input, hist_exog), dim=2) # [B, seq_len, 1] + [B, seq_len, X] -> [B, seq_len, 1 + X]\n",
"\n",
" if self.stat_exog_size > 0:\n",
- " stat_exog = stat_exog.unsqueeze(1).repeat(1, seq_len, 1) # [B, S] -> [B, seq_len, S]\n",
- " encoder_input = torch.cat((encoder_input, stat_exog), dim=2)\n",
- "\n",
- " # RNN forward\n",
- " hidden_state, _ = self.hist_encoder(encoder_input) # [B, seq_len, rnn_hidden_state]\n",
+ " # print(encoder_input.shape)\n",
+ " stat_exog = stat_exog.unsqueeze(1).repeat(1, seq_len, 1) # [B, S] -> [B, seq_len, S]\n",
+ " encoder_input = torch.cat((encoder_input, stat_exog), dim=2) # [B, seq_len, 1 + X] + [B, seq_len, S] -> [B, seq_len, 1 + X + S]\n",
"\n",
" if self.futr_exog_size > 0:\n",
- " futr_exog = futr_exog.permute(0,2,3,1)[:,:,1:,:] # [B, F, seq_len, 1+H] -> [B, seq_len, H, F]\n",
- " hidden_state = torch.cat(( hidden_state, futr_exog.reshape(batch_size, seq_len, -1)), dim=2)\n",
+ " encoder_input = torch.cat((encoder_input, \n",
+ " futr_exog[:, :seq_len]), dim=2) # [B, seq_len, 1 + X + S] + [B, seq_len, F] -> [B, seq_len, 1 + X + S + F]\n",
"\n",
- " # Context adapter\n",
- " context = self.context_adapter(hidden_state)\n",
- " context = context.reshape(batch_size, seq_len, self.h, self.context_size)\n",
+ " if self.RECURRENT:\n",
+ " if self.maintain_state:\n",
+ " rnn_state = self.rnn_state\n",
+ " else:\n",
+ " rnn_state = None\n",
+ " \n",
+ " output, rnn_state = self.hist_encoder(encoder_input, \n",
+ " rnn_state) # [B, seq_len, n_output]\n",
+ " if self.maintain_state:\n",
+ " self.rnn_state = rnn_state\n",
+ " else:\n",
+ " hidden_state, _ = self.hist_encoder(encoder_input, None) # [B, seq_len, rnn_hidden_state]\n",
+ " hidden_state = hidden_state[:, -self.h:] # [B, seq_len, rnn_hidden_state] -> [B, h, rnn_hidden_state]\n",
+ " \n",
+ " if self.futr_exog_size > 0:\n",
+ " futr_exog_futr = futr_exog[:, -self.h:] # [B, h, F]\n",
+ " hidden_state = torch.cat((hidden_state, \n",
+ " futr_exog_futr), dim=-1) # [B, h, rnn_hidden_state] + [B, h, F] -> [B, h, rnn_hidden_state + F]\n",
"\n",
- " # Residual connection with futr_exog\n",
- " if self.futr_exog_size > 0:\n",
- " context = torch.cat((context, futr_exog), dim=-1)\n",
+ " output = self.mlp_decoder(hidden_state) # [B, h, rnn_hidden_state + F] -> [B, seq_len, n_output]\n",
"\n",
- " # Final forecast\n",
- " output = self.mlp_decoder(context)\n",
- " output = self.loss.domain_map(output)\n",
- " \n",
- " return output"
+ " return output[:, -self.h:]"
]
},
{
@@ -300,6 +322,21 @@
"show_doc(LSTM.predict, name='LSTM.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(LSTM, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -326,17 +363,18 @@
"Y_test_df = AirPassengersPanel[AirPassengersPanel.ds>=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
"\n",
"nf = NeuralForecast(\n",
- " models=[LSTM(h=12, input_size=-1,\n",
- " loss=DistributionLoss(distribution='Normal', level=[80, 90]),\n",
+ " models=[LSTM(h=12, \n",
+ " input_size=24,\n",
+ " loss=DistributionLoss(distribution=\"Normal\", level=[80, 90]),\n",
" scaler_type='robust',\n",
" encoder_n_layers=2,\n",
" encoder_hidden_size=128,\n",
- " context_size=10,\n",
" decoder_hidden_size=128,\n",
" decoder_layers=2,\n",
" max_steps=200,\n",
" futr_exog_list=['y_[lag12]'],\n",
" stat_exog_list=['airline1'],\n",
+ " recurrent=False,\n",
" )\n",
" ],\n",
" freq='M'\n",
@@ -344,19 +382,18 @@
"nf.fit(df=Y_train_df, static_df=AirPassengersStatic)\n",
"Y_hat_df = nf.predict(futr_df=Y_test_df)\n",
"\n",
+ "# Plots\n",
"Y_hat_df = Y_hat_df.reset_index(drop=False).drop(columns=['unique_id','ds'])\n",
"plot_df = pd.concat([Y_test_df, Y_hat_df], axis=1)\n",
"plot_df = pd.concat([Y_train_df, plot_df])\n",
"\n",
"plot_df = plot_df[plot_df.unique_id=='Airline1'].drop('unique_id', axis=1)\n",
"plt.plot(plot_df['ds'], plot_df['y'], c='black', label='True')\n",
- "plt.plot(plot_df['ds'], plot_df['LSTM'], c='purple', label='mean')\n",
"plt.plot(plot_df['ds'], plot_df['LSTM-median'], c='blue', label='median')\n",
"plt.fill_between(x=plot_df['ds'][-12:], \n",
- " y1=plot_df['LSTM-lo-90'][-12:].values, \n",
+ " y1=plot_df['LSTM-lo-90'][-12:].values,\n",
" y2=plot_df['LSTM-hi-90'][-12:].values,\n",
" alpha=0.4, label='level 90')\n",
- "plt.legend()\n",
"plt.grid()\n",
"plt.plot()"
]
diff --git a/nbs/models.mlp.ipynb b/nbs/models.mlp.ipynb
index 46c09406f..848cd037c 100644
--- a/nbs/models.mlp.ipynb
+++ b/nbs/models.mlp.ipynb
@@ -49,8 +49,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -67,7 +70,7 @@
"import torch.nn as nn\n",
"\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_windows import BaseWindows"
+ "from neuralforecast.common._base_model import BaseModel"
]
},
{
@@ -78,7 +81,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class MLP(BaseWindows):\n",
+ "class MLP(BaseModel):\n",
" \"\"\" MLP\n",
"\n",
" Simple Multi Layer Perceptron architecture (MLP). \n",
@@ -122,10 +125,11 @@
" `**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'windows'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = True\n",
- " EXOGENOUS_STAT = True \n",
+ " EXOGENOUS_STAT = True\n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h,\n",
@@ -211,7 +215,7 @@
" def forward(self, windows_batch):\n",
"\n",
" # Parse windows_batch\n",
- " insample_y = windows_batch['insample_y']\n",
+ " insample_y = windows_batch['insample_y'].squeeze(-1)\n",
" futr_exog = windows_batch['futr_exog']\n",
" hist_exog = windows_batch['hist_exog']\n",
" stat_exog = windows_batch['stat_exog']\n",
@@ -235,7 +239,6 @@
"\n",
" y_pred = y_pred.reshape(batch_size, self.h, \n",
" self.loss.outputsize_multiplier)\n",
- " y_pred = self.loss.domain_map(y_pred)\n",
" return y_pred"
]
},
@@ -269,6 +272,22 @@
"show_doc(MLP.predict, name='MLP.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a09d7a35",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(MLP, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -421,6 +440,7 @@
"fcst.fit(df=Y_train_df, static_df=AirPassengersStatic, val_size=12)\n",
"forecasts = fcst.predict(futr_df=Y_test_df)\n",
"\n",
+ "# Plot predictions\n",
"Y_hat_df = forecasts.reset_index(drop=False).drop(columns=['unique_id','ds'])\n",
"plot_df = pd.concat([Y_test_df, Y_hat_df], axis=1)\n",
"plot_df = pd.concat([Y_train_df, plot_df])\n",
diff --git a/nbs/models.mlpmultivariate.ipynb b/nbs/models.mlpmultivariate.ipynb
index 71abdfb04..d06f3034b 100644
--- a/nbs/models.mlpmultivariate.ipynb
+++ b/nbs/models.mlpmultivariate.ipynb
@@ -49,8 +49,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -64,8 +67,9 @@
"import torch\n",
"import torch.nn as nn\n",
"\n",
+ "from typing import Optional\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_multivariate import BaseMultivariate"
+ "from neuralforecast.common._base_model import BaseModel"
]
},
{
@@ -76,7 +80,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class MLPMultivariate(BaseMultivariate):\n",
+ "class MLPMultivariate(BaseModel):\n",
" \"\"\" MLPMultivariate\n",
"\n",
" Simple Multi Layer Perceptron architecture (MLP) for multivariate forecasting. \n",
@@ -102,6 +106,10 @@
" `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n",
" `val_check_steps`: int=100, Number of training steps between every validation loss check.
\n",
" `batch_size`: int=32, number of different series in each batch.
\n",
+ " `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n",
+ " `windows_batch_size`: int=256, number of windows to sample in each training batch, default uses all.
\n",
+ " `inference_windows_batch_size`: int=256, number of windows to sample in each inference batch, -1 uses all.
\n",
+ " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
" `step_size`: int=1, step size between each window of temporal data.
\n",
" `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n",
@@ -116,10 +124,11 @@
" `**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'multivariate'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = True\n",
" EXOGENOUS_STAT = True \n",
+ " MULTIVARIATE = True # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h,\n",
@@ -128,6 +137,7 @@
" futr_exog_list = None,\n",
" hist_exog_list = None,\n",
" stat_exog_list = None,\n",
+ " exclude_insample_y = False,\n",
" num_layers = 2,\n",
" hidden_size = 1024,\n",
" loss = MAE(),\n",
@@ -138,6 +148,10 @@
" early_stop_patience_steps: int =-1,\n",
" val_check_steps: int = 100,\n",
" batch_size: int = 32,\n",
+ " valid_batch_size: Optional[int] = None,\n",
+ " windows_batch_size = 256,\n",
+ " inference_windows_batch_size = 256,\n",
+ " start_padding_enabled = False,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'identity',\n",
" random_seed: int = 1,\n",
@@ -157,6 +171,7 @@
" futr_exog_list=futr_exog_list,\n",
" hist_exog_list=hist_exog_list,\n",
" stat_exog_list=stat_exog_list,\n",
+ " exclude_insample_y = exclude_insample_y,\n",
" loss=loss,\n",
" valid_loss=valid_loss,\n",
" max_steps=max_steps,\n",
@@ -165,6 +180,10 @@
" early_stop_patience_steps=early_stop_patience_steps,\n",
" val_check_steps=val_check_steps,\n",
" batch_size=batch_size,\n",
+ " valid_batch_size=valid_batch_size,\n",
+ " windows_batch_size=windows_batch_size,\n",
+ " inference_windows_batch_size=inference_windows_batch_size,\n",
+ " start_padding_enabled=start_padding_enabled,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" num_workers_loader=num_workers_loader,\n",
@@ -223,15 +242,9 @@
" x = torch.relu(layer(x))\n",
" x = self.out(x)\n",
" \n",
- " x = x.reshape(batch_size, self.h, -1)\n",
- " forecast = self.loss.domain_map(x)\n",
+ " forecast = x.reshape(batch_size, self.h, -1)\n",
"\n",
- " # domain_map might have squeezed the last dimension in case n_series == 1\n",
- " # Note that this fails in case of a tuple loss, but Multivariate does not support tuple losses yet.\n",
- " if forecast.ndim == 2:\n",
- " return forecast.unsqueeze(-1)\n",
- " else:\n",
- " return forecast"
+ " return forecast"
]
},
{
@@ -267,76 +280,17 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "1bf909e1",
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "import logging\n",
- "import warnings\n",
- "\n",
- "from neuralforecast import NeuralForecast\n",
- "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
- "from neuralforecast.losses.pytorch import MAE, MSE, RMSE, MAPE, SMAPE, MASE, relMSE, QuantileLoss, MQLoss, DistributionLoss,PMM, GMM, NBMM, HuberLoss, TukeyLoss, HuberQLoss, HuberMQLoss"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "f7ee8d15",
+ "id": "6c22db80",
"metadata": {},
"outputs": [],
"source": [
"#| hide\n",
- "# Test losses\n",
+ "# Unit tests for models\n",
"logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
- "warnings.filterwarnings(\"ignore\")\n",
- "\n",
- "Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
- "\n",
- "AirPassengersStatic_single = AirPassengersStatic[AirPassengersStatic[\"unique_id\"] == 'Airline1']\n",
- "Y_train_df_single = Y_train_df[Y_train_df[\"unique_id\"] == 'Airline1']\n",
- "Y_test_df_single = Y_test_df[Y_test_df[\"unique_id\"] == 'Airline1']\n",
- "\n",
- "losses = [MAE(), MSE(), RMSE(), MAPE(), SMAPE(), MASE(seasonality=12), relMSE(y_train=Y_train_df), QuantileLoss(q=0.5), MQLoss(), DistributionLoss(distribution='Bernoulli'), DistributionLoss(distribution='Normal'), DistributionLoss(distribution='Poisson'), DistributionLoss(distribution='StudentT'), DistributionLoss(distribution='NegativeBinomial'), DistributionLoss(distribution='Tweedie'), PMM(), GMM(), NBMM(), HuberLoss(), TukeyLoss(), HuberQLoss(q=0.5), HuberMQLoss()]\n",
- "valid_losses = [MAE(), MSE(), RMSE(), MAPE(), SMAPE(), MASE(seasonality=12), relMSE(y_train=Y_train_df), QuantileLoss(q=0.5), MQLoss(), DistributionLoss(distribution='Bernoulli'), DistributionLoss(distribution='Normal'), DistributionLoss(distribution='Poisson'), DistributionLoss(distribution='StudentT'), DistributionLoss(distribution='NegativeBinomial'), DistributionLoss(distribution='Tweedie'), PMM(), GMM(), NBMM(), HuberLoss(), TukeyLoss(), HuberQLoss(q=0.5), HuberMQLoss()]\n",
- "\n",
- "for loss, valid_loss in zip(losses, valid_losses):\n",
- " try:\n",
- " model = MLPMultivariate(h=12, \n",
- " input_size=24,\n",
- " n_series=2,\n",
- " loss = loss,\n",
- " valid_loss = valid_loss,\n",
- " scaler_type='robust',\n",
- " learning_rate=1e-3,\n",
- " max_steps=2,\n",
- " val_check_steps=10,\n",
- " early_stop_patience_steps=2,\n",
- " )\n",
- "\n",
- " fcst = NeuralForecast(models=[model], freq='M')\n",
- " fcst.fit(df=Y_train_df, static_df=AirPassengersStatic, val_size=12)\n",
- " forecasts = fcst.predict(futr_df=Y_test_df)\n",
- " except Exception as e:\n",
- " assert str(e) == f\"{loss} is not supported in a Multivariate model.\"\n",
- "\n",
- "\n",
- "# Test n_series = 1\n",
- "model = MLPMultivariate(h=12, \n",
- " input_size=24,\n",
- " n_series=1,\n",
- " loss = MAE(),\n",
- " scaler_type='robust',\n",
- " learning_rate=1e-3,\n",
- " max_steps=2,\n",
- " val_check_steps=10,\n",
- " early_stop_patience_steps=2,\n",
- " )\n",
- "fcst = NeuralForecast(models=[model], freq='M')\n",
- "fcst.fit(df=Y_train_df_single, static_df=AirPassengersStatic_single, val_size=12)\n",
- "forecasts = fcst.predict(futr_df=Y_test_df_single) "
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(MLPMultivariate, [\"airpassengers\"])"
]
},
{
@@ -374,6 +328,7 @@
" loss = MAE(),\n",
" scaler_type='robust',\n",
" learning_rate=1e-3,\n",
+ " stat_exog_list=['airline1'],\n",
" max_steps=200,\n",
" val_check_steps=10,\n",
" early_stop_patience_steps=2)\n",
@@ -385,6 +340,7 @@
"fcst.fit(df=Y_train_df, static_df=AirPassengersStatic, val_size=12)\n",
"forecasts = fcst.predict(futr_df=Y_test_df)\n",
"\n",
+ "# Plot predictions\n",
"Y_hat_df = forecasts.reset_index(drop=False).drop(columns=['unique_id','ds'])\n",
"plot_df = pd.concat([Y_test_df, Y_hat_df], axis=1)\n",
"plot_df = pd.concat([Y_train_df, plot_df])\n",
diff --git a/nbs/models.nbeats.ipynb b/nbs/models.nbeats.ipynb
index 9504770d5..be1c8a93a 100644
--- a/nbs/models.nbeats.ipynb
+++ b/nbs/models.nbeats.ipynb
@@ -66,7 +66,7 @@
"import torch.nn as nn\n",
"\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_windows import BaseWindows"
+ "from neuralforecast.common._base_model import BaseModel"
]
},
{
@@ -77,9 +77,12 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
"from nbdev.showdoc import show_doc\n",
"from neuralforecast.utils import generate_series\n",
+ "from neuralforecast.common._model_checks import check_model\n",
"\n",
"import matplotlib.pyplot as plt"
]
@@ -231,7 +234,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class NBEATS(BaseWindows):\n",
+ "class NBEATS(BaseModel):\n",
" \"\"\" NBEATS\n",
"\n",
" The Neural Basis Expansion Analysis for Time Series (NBEATS), is a simple and yet\n",
@@ -282,10 +285,11 @@
" \"N-BEATS: Neural basis expansion analysis for interpretable time series forecasting\".](https://arxiv.org/abs/1905.10437)\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'windows'\n",
" EXOGENOUS_FUTR = False\n",
" EXOGENOUS_HIST = False\n",
" EXOGENOUS_STAT = False\n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
" \n",
" def __init__(self,\n",
" h,\n",
@@ -420,8 +424,8 @@
" def forward(self, windows_batch):\n",
" \n",
" # Parse windows_batch\n",
- " insample_y = windows_batch['insample_y']\n",
- " insample_mask = windows_batch['insample_mask']\n",
+ " insample_y = windows_batch['insample_y'].squeeze(-1)\n",
+ " insample_mask = windows_batch['insample_mask'].squeeze(-1)\n",
"\n",
" # NBEATS' forward\n",
" residuals = insample_y.flip(dims=(-1,)) # backcast init\n",
@@ -435,10 +439,7 @@
" forecast = forecast + block_forecast\n",
"\n",
" if self.decompose_forecast:\n",
- " block_forecasts.append(block_forecast)\n",
- "\n",
- " # Adapting output's domain\n",
- " forecast = self.loss.domain_map(forecast) \n",
+ " block_forecasts.append(block_forecast) \n",
"\n",
" if self.decompose_forecast:\n",
" # (n_batch, n_blocks, h, out_features)\n",
@@ -480,6 +481,22 @@
"show_doc(NBEATS.predict, name='NBEATS.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8de78f60",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(NBEATS, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
diff --git a/nbs/models.nbeatsx.ipynb b/nbs/models.nbeatsx.ipynb
index 9952c3cf9..aaba3b760 100644
--- a/nbs/models.nbeatsx.ipynb
+++ b/nbs/models.nbeatsx.ipynb
@@ -62,7 +62,8 @@
"\n",
"from fastcore.test import test_eq, test_fail\n",
"from nbdev.showdoc import show_doc\n",
- "from neuralforecast.utils import generate_series"
+ "from neuralforecast.utils import generate_series\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -80,7 +81,7 @@
"import torch.nn as nn\n",
"\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_windows import BaseWindows"
+ "from neuralforecast.common._base_model import BaseModel"
]
},
{
@@ -373,7 +374,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class NBEATSx(BaseWindows):\n",
+ "class NBEATSx(BaseModel):\n",
" \"\"\"NBEATSx\n",
"\n",
" The Neural Basis Expansion Analysis with Exogenous variables (NBEATSx) is a simple\n",
@@ -427,10 +428,11 @@
" \"\"\"\n",
"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = \"windows\"\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = True\n",
" EXOGENOUS_STAT = True\n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(\n",
" self,\n",
@@ -612,8 +614,8 @@
"\n",
" def forward(self, windows_batch):\n",
" # Parse windows_batch\n",
- " insample_y = windows_batch[\"insample_y\"]\n",
- " insample_mask = windows_batch[\"insample_mask\"]\n",
+ " insample_y = windows_batch[\"insample_y\"].squeeze(-1)\n",
+ " insample_mask = windows_batch[\"insample_mask\"].squeeze(-1)\n",
" futr_exog = windows_batch[\"futr_exog\"]\n",
" hist_exog = windows_batch[\"hist_exog\"]\n",
" stat_exog = windows_batch[\"stat_exog\"]\n",
@@ -637,9 +639,6 @@
" if self.decompose_forecast:\n",
" block_forecasts.append(block_forecast)\n",
"\n",
- " # Adapting output's domain\n",
- " forecast = self.loss.domain_map(forecast)\n",
- "\n",
" if self.decompose_forecast:\n",
" # (n_batch, n_blocks, h)\n",
" block_forecasts = torch.stack(block_forecasts)\n",
@@ -680,6 +679,22 @@
"show_doc(NBEATSx.predict, name='NBEATSx.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ce8cba7d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(NBEATSx, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -806,7 +821,7 @@
"# test seasonality/trend basis protection\n",
"test_fail(NBEATSx.__init__, \n",
" contains='Horizon `h=1` incompatible with `seasonality` or `trend` in stacks',\n",
- " kwargs=dict(self=BaseWindows, h=1, input_size=4))"
+ " kwargs=dict(self=BaseModel, h=1, input_size=4))"
]
},
{
diff --git a/nbs/models.nhits.ipynb b/nbs/models.nhits.ipynb
index e844f4660..98da310c1 100644
--- a/nbs/models.nhits.ipynb
+++ b/nbs/models.nhits.ipynb
@@ -67,7 +67,7 @@
"import torch.nn.functional as F\n",
"\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_windows import BaseWindows"
+ "from neuralforecast.common._base_model import BaseModel"
]
},
{
@@ -83,7 +83,8 @@
"import matplotlib.pyplot as plt\n",
"from fastcore.test import test_eq\n",
"from nbdev.showdoc import show_doc\n",
- "from neuralforecast.utils import generate_series"
+ "from neuralforecast.utils import generate_series\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -261,7 +262,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class NHITS(BaseWindows):\n",
+ "class NHITS(BaseModel):\n",
" \"\"\" NHITS\n",
"\n",
" The Neural Hierarchical Interpolation for Time Series (NHITS), is an MLP-based deep\n",
@@ -316,10 +317,11 @@
" Accepted at the Thirty-Seventh AAAI Conference on Artificial Intelligence.](https://arxiv.org/abs/2201.12886)\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'windows'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = True\n",
" EXOGENOUS_STAT = True\n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self, \n",
" h,\n",
@@ -455,8 +457,8 @@
" def forward(self, windows_batch):\n",
" \n",
" # Parse windows_batch\n",
- " insample_y = windows_batch['insample_y']\n",
- " insample_mask = windows_batch['insample_mask']\n",
+ " insample_y = windows_batch['insample_y'].squeeze(-1).contiguous()\n",
+ " insample_mask = windows_batch['insample_mask'].squeeze(-1).contiguous()\n",
" futr_exog = windows_batch['futr_exog']\n",
" hist_exog = windows_batch['hist_exog']\n",
" stat_exog = windows_batch['stat_exog']\n",
@@ -476,9 +478,6 @@
" if self.decompose_forecast:\n",
" block_forecasts.append(block_forecast)\n",
" \n",
- " # Adapting output's domain\n",
- " forecast = self.loss.domain_map(forecast)\n",
- "\n",
" if self.decompose_forecast:\n",
" # (n_batch, n_blocks, h, output_size)\n",
" block_forecasts = torch.stack(block_forecasts)\n",
@@ -516,6 +515,21 @@
"show_doc(NHITS.predict, name='NHITS.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(NHITS, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -611,7 +625,6 @@
"from neuralforecast.losses.pytorch import DistributionLoss\n",
"from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
"\n",
- "\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
"\n",
diff --git a/nbs/models.nlinear.ipynb b/nbs/models.nlinear.ipynb
index b55d42204..fc67b409a 100644
--- a/nbs/models.nlinear.ipynb
+++ b/nbs/models.nlinear.ipynb
@@ -53,7 +53,7 @@
"\n",
"import torch.nn as nn\n",
"\n",
- "from neuralforecast.common._base_windows import BaseWindows\n",
+ "from neuralforecast.common._base_model import BaseModel\n",
"\n",
"from neuralforecast.losses.pytorch import MAE"
]
@@ -65,8 +65,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -76,7 +79,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class NLinear(BaseWindows):\n",
+ "class NLinear(BaseModel):\n",
" \"\"\" NLinear\n",
"\n",
" *Parameters:*
\n",
@@ -113,10 +116,11 @@
"\t- Zeng, Ailing, et al. \"Are transformers effective for time series forecasting?.\" Proceedings of the AAAI conference on artificial intelligence. Vol. 37. No. 9. 2023.\"\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'windows'\n",
" EXOGENOUS_FUTR = False\n",
" EXOGENOUS_HIST = False\n",
" EXOGENOUS_STAT = False\n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h: int, \n",
@@ -188,11 +192,7 @@
"\n",
" def forward(self, windows_batch):\n",
" # Parse windows_batch\n",
- " insample_y = windows_batch['insample_y']\n",
- " #insample_mask = windows_batch['insample_mask']\n",
- " #hist_exog = windows_batch['hist_exog']\n",
- " #stat_exog = windows_batch['stat_exog']\n",
- " #futr_exog = windows_batch['futr_exog']\n",
+ " insample_y = windows_batch['insample_y'].squeeze(-1)\n",
"\n",
" # Parse inputs\n",
" batch_size = len(insample_y)\n",
@@ -204,7 +204,6 @@
" # Final\n",
" forecast = self.linear(norm_insample_y) + last_value\n",
" forecast = forecast.reshape(batch_size, self.h, self.loss.outputsize_multiplier)\n",
- " forecast = self.loss.domain_map(forecast)\n",
" return forecast"
]
},
@@ -235,6 +234,21 @@
"show_doc(NLinear.predict, name='NLinear.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(NLinear, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -254,7 +268,7 @@
"\n",
"from neuralforecast import NeuralForecast\n",
"from neuralforecast.models import NLinear\n",
- "from neuralforecast.losses.pytorch import MQLoss, DistributionLoss\n",
+ "from neuralforecast.losses.pytorch import DistributionLoss\n",
"from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic, augment_calendar_df\n",
"\n",
"AirPassengersPanel, calendar_cols = augment_calendar_df(df=AirPassengersPanel, freq='M')\n",
@@ -264,8 +278,7 @@
"\n",
"model = NLinear(h=12,\n",
" input_size=24,\n",
- " loss=MAE(),\n",
- " #loss=DistributionLoss(distribution='StudentT', level=[80, 90], return_params=True),\n",
+ " loss=DistributionLoss(distribution='StudentT', level=[80, 90], return_params=True),\n",
" scaler_type='robust',\n",
" learning_rate=1e-3,\n",
" max_steps=500,\n",
diff --git a/nbs/models.patchtst.ipynb b/nbs/models.patchtst.ipynb
index 31064cc24..bd6f2a35f 100644
--- a/nbs/models.patchtst.ipynb
+++ b/nbs/models.patchtst.ipynb
@@ -61,7 +61,7 @@
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"\n",
- "from neuralforecast.common._base_windows import BaseWindows\n",
+ "from neuralforecast.common._base_model import BaseModel\n",
"from neuralforecast.common._modules import RevIN\n",
"\n",
"from neuralforecast.losses.pytorch import MAE"
@@ -74,8 +74,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -611,7 +614,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class PatchTST(BaseWindows):\n",
+ "class PatchTST(BaseModel):\n",
" \"\"\" PatchTST\n",
"\n",
" The PatchTST model is an efficient Transformer-based model for multivariate time series forecasting.\n",
@@ -673,10 +676,11 @@
" -[Nie, Y., Nguyen, N. H., Sinthong, P., & Kalagnanam, J. (2022). \"A Time Series is Worth 64 Words: Long-term Forecasting with Transformers\"](https://arxiv.org/pdf/2211.14730.pdf)\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'windows'\n",
" EXOGENOUS_FUTR = False\n",
" EXOGENOUS_HIST = False\n",
" EXOGENOUS_STAT = False\n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h,\n",
@@ -789,21 +793,11 @@
" def forward(self, windows_batch): # x: [batch, input_size]\n",
"\n",
" # Parse windows_batch\n",
- " insample_y = windows_batch['insample_y']\n",
- " #insample_mask = windows_batch['insample_mask']\n",
- " #hist_exog = windows_batch['hist_exog']\n",
- " #stat_exog = windows_batch['stat_exog']\n",
- " #futr_exog = windows_batch['futr_exog']\n",
- "\n",
- " # Add dimension for channel\n",
- " x = insample_y.unsqueeze(-1) # [Ws,L,1]\n",
+ " x = windows_batch['insample_y']\n",
"\n",
" x = x.permute(0,2,1) # x: [Batch, 1, input_size]\n",
" x = self.model(x)\n",
- " x = x.reshape(x.shape[0], self.h, -1) # x: [Batch, h, c_out]\n",
- "\n",
- " # Domain map\n",
- " forecast = self.loss.domain_map(x)\n",
+ " forecast = x.reshape(x.shape[0], self.h, -1) # x: [Batch, h, c_out]\n",
" \n",
" return forecast"
]
@@ -835,6 +829,21 @@
"show_doc(PatchTST.predict, name='PatchTST.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(PatchTST, [\"airpassengers\"])"
+ ]
+ },
{
"attachments": {},
"cell_type": "markdown",
@@ -872,7 +881,6 @@
" n_heads=4,\n",
" scaler_type='robust',\n",
" loss=DistributionLoss(distribution='StudentT', level=[80, 90]),\n",
- " #loss=MAE(),\n",
" learning_rate=1e-3,\n",
" max_steps=500,\n",
" val_check_steps=50,\n",
diff --git a/nbs/models.rmok.ipynb b/nbs/models.rmok.ipynb
index 017477c13..96dd6e195 100644
--- a/nbs/models.rmok.ipynb
+++ b/nbs/models.rmok.ipynb
@@ -37,8 +37,8 @@
"# Reversible Mixture of KAN - RMoK\n",
"The Reversible Mixture of KAN (RMoK) is a KAN-based model for time series forecasting which uses a mixture-of-experts structure to assign variables to different KAN experts, such as WaveKAN, TaylorKAN and JacobiKAN.\n",
"\n",
- "**Reference**\n",
- "- [Xiao Han, Xinfeng Zhang, Yiling Wu, Zhenduo Zhang, Zhe Wu.\"KAN4TSF: Are KAN and KAN-based models Effective for Time Series Forecasting?\"](https://arxiv.org/abs/2408.11306)"
+ "**References**
\n",
+ "[Xiao Han, Xinfeng Zhang, Yiling Wu, Zhenduo Zhang, Zhe Wu.\"KAN4TSF: Are KAN and KAN-based models Effective for Time Series Forecasting?\"](https://arxiv.org/abs/2408.11306)
"
]
},
{
@@ -55,8 +55,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -73,8 +76,9 @@
"import torch.nn.functional as F\n",
"\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_multivariate import BaseMultivariate\n",
- "from neuralforecast.common._modules import RevIN"
+ "from neuralforecast.common._base_model import BaseModel\n",
+ "from neuralforecast.common._modules import RevINMultivariate\n",
+ "from typing import Optional"
]
},
{
@@ -331,9 +335,11 @@
"source": [
"#| export\n",
"\n",
- "class RMoK(BaseMultivariate):\n",
+ "class RMoK(BaseModel):\n",
" \"\"\" Reversible Mixture of KAN\n",
- " **Parameters**
\n",
+ " \n",
+ " \n",
+ " **Parameters:**
\n",
" `h`: int, Forecast horizon.
\n",
" `input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n",
" `n_series`: int, number of time-series.
\n",
@@ -353,6 +359,10 @@
" `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n",
" `val_check_steps`: int=100, Number of training steps between every validation loss check.
\n",
" `batch_size`: int=32, number of different series in each batch.
\n",
+ " `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n",
+ " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n",
+ " `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch, -1 uses all.
\n",
+ " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
" `step_size`: int=1, step size between each window of temporal data.
\n",
" `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n",
@@ -366,20 +376,21 @@
" `dataloader_kwargs`: dict, optional, list of parameters passed into the PyTorch Lightning dataloader by the `TimeSeriesDataLoader`.
\n",
" `**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n",
"\n",
- " Reference
\n",
- " [Xiao Han, Xinfeng Zhang, Yiling Wu, Zhenduo Zhang, Zhe Wu.\"KAN4TSF: Are KAN and KAN-based models Effective for Time Series Forecasting?\"](https://arxiv.org/abs/2408.11306)\n",
+ " **References**
\n",
+ " - [Xiao Han, Xinfeng Zhang, Yiling Wu, Zhenduo Zhang, Zhe Wu.\"KAN4TSF: Are KAN and KAN-based models Effective for Time Series Forecasting?\". arXiv.](https://arxiv.org/abs/2408.11306)
\n",
" \"\"\"\n",
"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'multivariate'\n",
" EXOGENOUS_FUTR = False\n",
" EXOGENOUS_HIST = False\n",
" EXOGENOUS_STAT = False\n",
+ " MULTIVARIATE = True # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h,\n",
" input_size,\n",
- " n_series,\n",
+ " n_series: int,\n",
" futr_exog_list = None,\n",
" hist_exog_list = None,\n",
" stat_exog_list = None,\n",
@@ -396,6 +407,10 @@
" early_stop_patience_steps: int =-1,\n",
" val_check_steps: int = 100,\n",
" batch_size: int = 32,\n",
+ " valid_batch_size: Optional[int] = None,\n",
+ " windows_batch_size = 1024,\n",
+ " inference_windows_batch_size = 1024,\n",
+ " start_padding_enabled = False,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'identity',\n",
" random_seed: int = 1,\n",
@@ -422,6 +437,10 @@
" early_stop_patience_steps=early_stop_patience_steps,\n",
" val_check_steps=val_check_steps,\n",
" batch_size=batch_size,\n",
+ " valid_batch_size=valid_batch_size,\n",
+ " windows_batch_size=windows_batch_size,\n",
+ " inference_windows_batch_size=inference_windows_batch_size,\n",
+ " start_padding_enabled=start_padding_enabled,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" random_seed=random_seed,\n",
@@ -445,35 +464,31 @@
" self.wavelet_function = wavelet_function\n",
"\n",
" self.experts = nn.ModuleList([\n",
- " TaylorKANLayer(self.input_size, self.h, order=self.taylor_order, addbias=True),\n",
- " JacobiKANLayer(self.input_size, self.h, degree=self.jacobi_degree),\n",
- " WaveKANLayer(self.input_size, self.h, wavelet_type=self.wavelet_function),\n",
- " nn.Linear(self.input_size, self.h),\n",
+ " TaylorKANLayer(self.input_size, self.h * self.loss.outputsize_multiplier, order=self.taylor_order, addbias=True),\n",
+ " JacobiKANLayer(self.input_size, self.h * self.loss.outputsize_multiplier, degree=self.jacobi_degree),\n",
+ " WaveKANLayer(self.input_size, self.h * self.loss.outputsize_multiplier, wavelet_type=self.wavelet_function),\n",
+ " nn.Linear(self.input_size, self.h * self.loss.outputsize_multiplier),\n",
" ])\n",
" \n",
" self.num_experts = len(self.experts)\n",
" self.gate = nn.Linear(self.input_size, self.num_experts)\n",
" self.softmax = nn.Softmax(dim=-1)\n",
- " self.rev = RevIN(self.n_series, affine=self.revin_affine)\n",
+ " self.rev = RevINMultivariate(self.n_series, affine=self.revin_affine)\n",
"\n",
" def forward(self, windows_batch):\n",
" insample_y = windows_batch['insample_y']\n",
" B, L, N = insample_y.shape\n",
- " x = self.rev(insample_y, 'norm') if self.rev else insample_y\n",
+ " x = self.rev(insample_y, 'norm')\n",
" x = self.dropout(x).transpose(1, 2).reshape(B * N, L)\n",
"\n",
" score = F.softmax(self.gate(x), dim=-1)\n",
" expert_outputs = torch.stack([self.experts[i](x) for i in range(self.num_experts)], dim=-1)\n",
"\n",
- " y_pred = torch.einsum(\"BLE,BE->BL\", expert_outputs, score).reshape(B, N, -1).permute(0, 2, 1)\n",
+ " y_pred = torch.einsum(\"BLE, BE -> BL\", expert_outputs, score).reshape(B, N, self.h * self.loss.outputsize_multiplier).permute(0, 2, 1)\n",
" y_pred = self.rev(y_pred, 'denorm')\n",
- " y_pred = self.loss.domain_map(y_pred)\n",
+ " y_pred = y_pred.reshape(B, self.h, -1)\n",
"\n",
- " # domain_map might have squeezed the last dimension in case n_series == 1\n",
- " if y_pred.ndim == 2:\n",
- " return y_pred.unsqueeze(-1)\n",
- " else:\n",
- " return y_pred"
+ " return y_pred"
]
},
{
@@ -503,6 +518,21 @@
"show_doc(RMoK.predict, name='RMoK.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(RMoK, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -560,13 +590,6 @@
"ax.legend(prop={'size': 15})\n",
"ax.grid()"
]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {
diff --git a/nbs/models.rnn.ipynb b/nbs/models.rnn.ipynb
index f5e1a67b9..8a92fdfb2 100644
--- a/nbs/models.rnn.ipynb
+++ b/nbs/models.rnn.ipynb
@@ -61,8 +61,10 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "from fastcore.test import test_eq\n",
"from nbdev.showdoc import show_doc\n",
- "from neuralforecast.utils import generate_series"
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -76,9 +78,10 @@
"\n",
"import torch\n",
"import torch.nn as nn\n",
+ "import warnings\n",
"\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_recurrent import BaseRecurrent\n",
+ "from neuralforecast.common._base_model import BaseModel\n",
"from neuralforecast.common._modules import MLP"
]
},
@@ -89,7 +92,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class RNN(BaseRecurrent):\n",
+ "class RNN(BaseModel):\n",
" \"\"\" RNN\n",
"\n",
" Multi Layer Elman RNN (RNN), with MLP decoder.\n",
@@ -106,7 +109,7 @@
" `encoder_activation`: str=`tanh`, type of RNN activation from `tanh` or `relu`.
\n",
" `encoder_bias`: bool=True, whether or not to use biases b_ih, b_hh within RNN units.
\n",
" `encoder_dropout`: float=0., dropout regularization applied to RNN outputs.
\n",
- " `context_size`: int=10, size of context vector for each timestamp on the forecasting window.
\n",
+ " `context_size`: deprecated.
\n",
" `decoder_hidden_size`: int=200, size of hidden layer for the MLP decoder.
\n",
" `decoder_layers`: int=2, number of layers for the MLP decoder.
\n",
" `futr_exog_list`: str list, future exogenous columns.
\n",
@@ -135,26 +138,29 @@
" `**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'recurrent'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = True\n",
" EXOGENOUS_STAT = True\n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = True # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h: int,\n",
" input_size: int = -1,\n",
" inference_input_size: int = -1,\n",
" encoder_n_layers: int = 2,\n",
- " encoder_hidden_size: int = 200,\n",
+ " encoder_hidden_size: int = 128,\n",
" encoder_activation: str = 'tanh',\n",
" encoder_bias: bool = True,\n",
" encoder_dropout: float = 0.,\n",
- " context_size: int = 10,\n",
- " decoder_hidden_size: int = 200,\n",
+ " context_size: Optional[int] = None,\n",
+ " decoder_hidden_size: int = 128,\n",
" decoder_layers: int = 2,\n",
" futr_exog_list = None,\n",
" hist_exog_list = None,\n",
" stat_exog_list = None,\n",
+ " exclude_insample_y = False,\n",
+ " recurrent = False,\n",
" loss = MAE(),\n",
" valid_loss = None,\n",
" max_steps: int = 1000,\n",
@@ -164,6 +170,10 @@
" val_check_steps: int = 100,\n",
" batch_size=32,\n",
" valid_batch_size: Optional[int] = None,\n",
+ " windows_batch_size = 128,\n",
+ " inference_windows_batch_size = 1024,\n",
+ " start_padding_enabled = False,\n",
+ " step_size: int = 1,\n",
" scaler_type: str='robust',\n",
" random_seed=1,\n",
" num_workers_loader=0,\n",
@@ -174,10 +184,16 @@
" lr_scheduler_kwargs = None, \n",
" dataloader_kwargs = None, \n",
" **trainer_kwargs):\n",
+ " \n",
+ " self.RECURRENT = recurrent\n",
+ "\n",
" super(RNN, self).__init__(\n",
" h=h,\n",
" input_size=input_size,\n",
- " inference_input_size=inference_input_size,\n",
+ " futr_exog_list=futr_exog_list,\n",
+ " hist_exog_list=hist_exog_list,\n",
+ " stat_exog_list=stat_exog_list,\n",
+ " exclude_insample_y = exclude_insample_y,\n",
" loss=loss,\n",
" valid_loss=valid_loss,\n",
" max_steps=max_steps,\n",
@@ -187,13 +203,14 @@
" val_check_steps=val_check_steps,\n",
" batch_size=batch_size,\n",
" valid_batch_size=valid_batch_size,\n",
+ " windows_batch_size=windows_batch_size,\n",
+ " inference_windows_batch_size=inference_windows_batch_size,\n",
+ " start_padding_enabled=start_padding_enabled,\n",
+ " step_size=step_size,\n",
" scaler_type=scaler_type,\n",
- " futr_exog_list=futr_exog_list,\n",
- " hist_exog_list=hist_exog_list,\n",
- " stat_exog_list=stat_exog_list,\n",
+ " random_seed=random_seed,\n",
" num_workers_loader=num_workers_loader,\n",
" drop_last_loader=drop_last_loader,\n",
- " random_seed=random_seed,\n",
" optimizer=optimizer,\n",
" optimizer_kwargs=optimizer_kwargs,\n",
" lr_scheduler=lr_scheduler,\n",
@@ -208,7 +225,11 @@
" self.encoder_activation = encoder_activation\n",
" self.encoder_bias = encoder_bias\n",
" self.encoder_dropout = encoder_dropout\n",
- " \n",
+ "\n",
+ " # Context adapter\n",
+ " if context_size is not None:\n",
+ " warnings.warn(\"context_size is deprecated and will be removed in future versions.\")\n",
+ "\n",
" # Context adapter\n",
" self.context_size = context_size\n",
"\n",
@@ -217,69 +238,74 @@
" self.decoder_layers = decoder_layers\n",
"\n",
" # RNN input size (1 for target variable y)\n",
- " input_encoder = 1 + self.hist_exog_size + self.stat_exog_size\n",
+ " input_encoder = 1 + self.hist_exog_size + self.stat_exog_size + self.futr_exog_size\n",
"\n",
" # Instantiate model\n",
+ " self.rnn_state = None\n",
+ " self.maintain_state = False\n",
" self.hist_encoder = nn.RNN(input_size=input_encoder,\n",
- " hidden_size=self.encoder_hidden_size,\n",
- " num_layers=self.encoder_n_layers,\n",
- " nonlinearity=self.encoder_activation,\n",
- " bias=self.encoder_bias,\n",
- " dropout=self.encoder_dropout,\n",
- " batch_first=True)\n",
- "\n",
- " # Context adapter\n",
- " self.context_adapter = nn.Linear(in_features=self.encoder_hidden_size + self.futr_exog_size * h,\n",
- " out_features=self.context_size * h)\n",
+ " hidden_size=self.encoder_hidden_size,\n",
+ " num_layers=self.encoder_n_layers,\n",
+ " bias=self.encoder_bias,\n",
+ " dropout=self.encoder_dropout,\n",
+ " batch_first=True)\n",
"\n",
" # Decoder MLP\n",
- " self.mlp_decoder = MLP(in_features=self.context_size + self.futr_exog_size,\n",
- " out_features=self.loss.outputsize_multiplier,\n",
- " hidden_size=self.decoder_hidden_size,\n",
- " num_layers=self.decoder_layers,\n",
- " activation='ReLU',\n",
- " dropout=0.0)\n",
+ " if self.RECURRENT:\n",
+ " self.proj = nn.Linear(self.encoder_hidden_size, self.loss.outputsize_multiplier)\n",
+ " else:\n",
+ " self.mlp_decoder = MLP(in_features=self.encoder_hidden_size + self.futr_exog_size,\n",
+ " out_features=self.loss.outputsize_multiplier,\n",
+ " hidden_size=self.decoder_hidden_size,\n",
+ " num_layers=self.decoder_layers,\n",
+ " activation='ReLU',\n",
+ " dropout=0.0)\n",
"\n",
" def forward(self, windows_batch):\n",
" \n",
" # Parse windows_batch\n",
- " encoder_input = windows_batch['insample_y'] # [B, seq_len, 1]\n",
- " futr_exog = windows_batch['futr_exog']\n",
- " hist_exog = windows_batch['hist_exog']\n",
- " stat_exog = windows_batch['stat_exog']\n",
+ " encoder_input = windows_batch['insample_y'] # [B, seq_len, 1]\n",
+ " futr_exog = windows_batch['futr_exog'] # [B, seq_len, F]\n",
+ " hist_exog = windows_batch['hist_exog'] # [B, seq_len, X]\n",
+ " stat_exog = windows_batch['stat_exog'] # [B, S]\n",
"\n",
- " # Concatenate y, historic and static inputs\n",
- " # [B, C, seq_len, 1] -> [B, seq_len, C]\n",
- " # Contatenate [ Y_t, | X_{t-L},..., X_{t} | S ]\n",
+ " # Concatenate y, historic and static inputs \n",
" batch_size, seq_len = encoder_input.shape[:2]\n",
" if self.hist_exog_size > 0:\n",
- " hist_exog = hist_exog.permute(0,2,1,3).squeeze(-1) # [B, X, seq_len, 1] -> [B, seq_len, X]\n",
- " encoder_input = torch.cat((encoder_input, hist_exog), dim=2)\n",
+ " encoder_input = torch.cat((encoder_input, hist_exog), dim=2) # [B, seq_len, 1] + [B, seq_len, X] -> [B, seq_len, 1 + X]\n",
"\n",
" if self.stat_exog_size > 0:\n",
- " stat_exog = stat_exog.unsqueeze(1).repeat(1, seq_len, 1) # [B, S] -> [B, seq_len, S]\n",
- " encoder_input = torch.cat((encoder_input, stat_exog), dim=2)\n",
- "\n",
- " # RNN forward\n",
- " hidden_state, _ = self.hist_encoder(encoder_input) # [B, seq_len, rnn_hidden_state]\n",
+ " # print(encoder_input.shape)\n",
+ " stat_exog = stat_exog.unsqueeze(1).repeat(1, seq_len, 1) # [B, S] -> [B, seq_len, S]\n",
+ " encoder_input = torch.cat((encoder_input, stat_exog), dim=2) # [B, seq_len, 1 + X] + [B, seq_len, S] -> [B, seq_len, 1 + X + S]\n",
"\n",
" if self.futr_exog_size > 0:\n",
- " futr_exog = futr_exog.permute(0,2,3,1)[:,:,1:,:] # [B, F, seq_len, 1+H] -> [B, seq_len, H, F]\n",
- " hidden_state = torch.cat(( hidden_state, futr_exog.reshape(batch_size, seq_len, -1)), dim=2)\n",
+ " encoder_input = torch.cat((encoder_input, \n",
+ " futr_exog[:, :seq_len]), dim=2) # [B, seq_len, 1 + X + S] + [B, seq_len, F] -> [B, seq_len, 1 + X + S + F]\n",
"\n",
- " # Context adapter\n",
- " context = self.context_adapter(hidden_state)\n",
- " context = context.reshape(batch_size, seq_len, self.h, self.context_size)\n",
+ " if self.RECURRENT:\n",
+ " if self.maintain_state:\n",
+ " rnn_state = self.rnn_state\n",
+ " else:\n",
+ " rnn_state = None\n",
+ " \n",
+ " output, rnn_state = self.hist_encoder(encoder_input, \n",
+ " rnn_state) # [B, seq_len, rnn_hidden_state]\n",
+ " output = self.proj(output) # [B, seq_len, rnn_hidden_state] -> [B, seq_len, n_output]\n",
+ " if self.maintain_state:\n",
+ " self.rnn_state = rnn_state\n",
+ " else:\n",
+ " hidden_state, _ = self.hist_encoder(encoder_input, None) # [B, seq_len, rnn_hidden_state]\n",
+ " hidden_state = hidden_state[:, -self.h:] # [B, seq_len, rnn_hidden_state] -> [B, h, rnn_hidden_state]\n",
+ " \n",
+ " if self.futr_exog_size > 0:\n",
+ " futr_exog_futr = futr_exog[:, -self.h:] # [B, h, F]\n",
+ " hidden_state = torch.cat((hidden_state, \n",
+ " futr_exog_futr), dim=-1) # [B, h, rnn_hidden_state] + [B, h, F] -> [B, h, rnn_hidden_state + F]\n",
"\n",
- " # Residual connection with futr_exog\n",
- " if self.futr_exog_size > 0:\n",
- " context = torch.cat((context, futr_exog), dim=-1)\n",
+ " output = self.mlp_decoder(hidden_state) # [B, h, rnn_hidden_state + F] -> [B, seq_len, n_output]\n",
"\n",
- " # Final forecast\n",
- " output = self.mlp_decoder(context)\n",
- " output = self.loss.domain_map(output)\n",
- " \n",
- " return output"
+ " return output[:, -self.h:]"
]
},
{
@@ -309,6 +335,21 @@
"show_doc(RNN.predict, name='RNN.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(RNN, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -328,26 +369,24 @@
"\n",
"from neuralforecast import NeuralForecast\n",
"from neuralforecast.models import RNN\n",
- "from neuralforecast.losses.pytorch import MQLoss, DistributionLoss\n",
+ "from neuralforecast.losses.pytorch import MQLoss\n",
"from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
- "\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
"\n",
"fcst = NeuralForecast(\n",
" models=[RNN(h=12,\n",
- " input_size=-1,\n",
+ " input_size=24,\n",
" inference_input_size=24,\n",
" loss=MQLoss(level=[80, 90]),\n",
- " scaler_type='robust',\n",
+ " valid_loss=MQLoss(level=[80, 90]),\n",
+ " scaler_type='standard',\n",
" encoder_n_layers=2,\n",
" encoder_hidden_size=128,\n",
- " context_size=10,\n",
" decoder_hidden_size=128,\n",
" decoder_layers=2,\n",
- " max_steps=300,\n",
+ " max_steps=200,\n",
" futr_exog_list=['y_[lag12]'],\n",
- " #hist_exog_list=['y_[lag12]'],\n",
" stat_exog_list=['airline1'],\n",
" )\n",
" ],\n",
diff --git a/nbs/models.softs.ipynb b/nbs/models.softs.ipynb
index 978f3c2c2..588bd8dcb 100644
--- a/nbs/models.softs.ipynb
+++ b/nbs/models.softs.ipynb
@@ -27,8 +27,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -57,8 +60,9 @@
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"\n",
+ "from typing import Optional\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_multivariate import BaseMultivariate\n",
+ "from neuralforecast.common._base_model import BaseModel\n",
"from neuralforecast.common._modules import TransEncoder, TransEncoderLayer"
]
},
@@ -134,7 +138,7 @@
"\n",
" # stochastic pooling\n",
" if self.training:\n",
- " ratio = F.softmax(combined_mean, dim=1)\n",
+ " ratio = F.softmax(torch.nan_to_num(combined_mean), dim=1)\n",
" ratio = ratio.permute(0, 2, 1)\n",
" ratio = ratio.reshape(-1, channels)\n",
" indices = torch.multinomial(ratio, 1)\n",
@@ -169,7 +173,7 @@
"source": [
"#| export\n",
"\n",
- "class SOFTS(BaseMultivariate):\n",
+ "class SOFTS(BaseModel):\n",
"\n",
" \"\"\" SOFTS\n",
" \n",
@@ -194,6 +198,10 @@
" `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n",
" `val_check_steps`: int=100, Number of training steps between every validation loss check.
\n",
" `batch_size`: int=32, number of different series in each batch.
\n",
+ " `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n",
+ " `windows_batch_size`: int=256, number of windows to sample in each training batch, default uses all.
\n",
+ " `inference_windows_batch_size`: int=256, number of windows to sample in each inference batch, -1 uses all.
\n",
+ " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
" `step_size`: int=1, step size between each window of temporal data.
\n",
" `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n",
@@ -212,10 +220,11 @@
" \"\"\"\n",
"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'multivariate'\n",
" EXOGENOUS_FUTR = False\n",
" EXOGENOUS_HIST = False\n",
" EXOGENOUS_STAT = False\n",
+ " MULTIVARIATE = True\n",
+ " RECURRENT = False\n",
"\n",
" def __init__(self,\n",
" h,\n",
@@ -224,6 +233,7 @@
" futr_exog_list = None,\n",
" hist_exog_list = None,\n",
" stat_exog_list = None,\n",
+ " exclude_insample_y = False,\n",
" hidden_size: int = 512,\n",
" d_core: int = 512,\n",
" e_layers: int = 2,\n",
@@ -238,6 +248,10 @@
" early_stop_patience_steps: int =-1,\n",
" val_check_steps: int = 100,\n",
" batch_size: int = 32,\n",
+ " valid_batch_size: Optional[int] = None,\n",
+ " windows_batch_size = 256,\n",
+ " inference_windows_batch_size = 256,\n",
+ " start_padding_enabled = False,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'identity',\n",
" random_seed: int = 1,\n",
@@ -256,6 +270,7 @@
" stat_exog_list = None,\n",
" futr_exog_list = None,\n",
" hist_exog_list = None,\n",
+ " exclude_insample_y = exclude_insample_y,\n",
" loss=loss,\n",
" valid_loss=valid_loss,\n",
" max_steps=max_steps,\n",
@@ -264,6 +279,10 @@
" early_stop_patience_steps=early_stop_patience_steps,\n",
" val_check_steps=val_check_steps,\n",
" batch_size=batch_size,\n",
+ " valid_batch_size=valid_batch_size,\n",
+ " windows_batch_size=windows_batch_size,\n",
+ " inference_windows_batch_size=inference_windows_batch_size,\n",
+ " start_padding_enabled=start_padding_enabled,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" random_seed=random_seed,\n",
@@ -299,7 +318,7 @@
" ]\n",
" )\n",
"\n",
- " self.projection = nn.Linear(hidden_size, self.h, bias=True)\n",
+ " self.projection = nn.Linear(hidden_size, self.h * self.loss.outputsize_multiplier, bias=True)\n",
"\n",
" def forecast(self, x_enc):\n",
" # Normalization from Non-stationary Transformer\n",
@@ -316,22 +335,19 @@
"\n",
" # De-Normalization from Non-stationary Transformer\n",
" if self.use_norm:\n",
- " dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.h, 1))\n",
- " dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.h, 1))\n",
+ " dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.h * self.loss.outputsize_multiplier, 1))\n",
+ " dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.h * self.loss.outputsize_multiplier, 1))\n",
" return dec_out\n",
" \n",
" def forward(self, windows_batch):\n",
" insample_y = windows_batch['insample_y']\n",
"\n",
" y_pred = self.forecast(insample_y)\n",
- " y_pred = y_pred[:, -self.h:, :]\n",
- " y_pred = self.loss.domain_map(y_pred)\n",
+ " y_pred = y_pred.reshape(insample_y.shape[0],\n",
+ " self.h,\n",
+ " -1)\n",
"\n",
- " # domain_map might have squeezed the last dimension in case n_series == 1\n",
- " if y_pred.ndim == 2:\n",
- " return y_pred.unsqueeze(-1)\n",
- " else:\n",
- " return y_pred"
+ " return y_pred"
]
},
{
@@ -361,6 +377,21 @@
"show_doc(SOFTS.predict, name='SOFTS.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(SOFTS, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -381,9 +412,7 @@
"from neuralforecast import NeuralForecast\n",
"from neuralforecast.models import SOFTS\n",
"from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
- "from neuralforecast.losses.pytorch import MSE\n",
- "\n",
- "\n",
+ "from neuralforecast.losses.pytorch import MASE\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
"\n",
@@ -396,8 +425,7 @@
" d_ff=64,\n",
" dropout=0.1,\n",
" use_norm=True,\n",
- " loss=MSE(),\n",
- " valid_loss=MAE(),\n",
+ " loss=MASE(seasonality=4),\n",
" early_stop_patience_steps=3,\n",
" batch_size=32)\n",
"\n",
diff --git a/nbs/models.stemgnn.ipynb b/nbs/models.stemgnn.ipynb
index b2222fc1c..1e97e9bca 100644
--- a/nbs/models.stemgnn.ipynb
+++ b/nbs/models.stemgnn.ipynb
@@ -53,8 +53,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -68,8 +71,9 @@
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"\n",
+ "from typing import Optional\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_multivariate import BaseMultivariate"
+ "from neuralforecast.common._base_model import BaseModel"
]
},
{
@@ -171,7 +175,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class StemGNN(BaseMultivariate):\n",
+ "class StemGNN(BaseModel):\n",
" \"\"\" StemGNN\n",
"\n",
" The Spectral Temporal Graph Neural Network (`StemGNN`) is a Graph-based multivariate\n",
@@ -198,6 +202,10 @@
" `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n",
" `val_check_steps`: int=100, Number of training steps between every validation loss check.
\n",
" `batch_size`: int, number of windows in each batch.
\n",
+ " `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n",
+ " `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
\n",
+ " `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch, -1 uses all.
\n",
+ " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
" `step_size`: int=1, step size between each window of temporal data.
\n",
" `scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int, random_seed for pytorch initializer and numpy generators.
\n",
@@ -212,10 +220,11 @@
" `**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'multivariate'\n",
" EXOGENOUS_FUTR = False\n",
" EXOGENOUS_HIST = False\n",
" EXOGENOUS_STAT = False \n",
+ " MULTIVARIATE = True # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
" \n",
" def __init__(self,\n",
" h,\n",
@@ -224,6 +233,7 @@
" futr_exog_list = None,\n",
" hist_exog_list = None,\n",
" stat_exog_list = None,\n",
+ " exclude_insample_y = False,\n",
" n_stacks = 2,\n",
" multi_layer: int = 5,\n",
" dropout_rate: float = 0.5,\n",
@@ -236,6 +246,10 @@
" early_stop_patience_steps: int =-1,\n",
" val_check_steps: int = 100,\n",
" batch_size: int = 32,\n",
+ " valid_batch_size: Optional[int] = None,\n",
+ " windows_batch_size = 1024,\n",
+ " inference_windows_batch_size = 1024,\n",
+ " start_padding_enabled = False,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'robust',\n",
" random_seed: int = 1,\n",
@@ -254,7 +268,8 @@
" n_series=n_series,\n",
" futr_exog_list=futr_exog_list,\n",
" hist_exog_list=hist_exog_list,\n",
- " stat_exog_list=stat_exog_list, \n",
+ " stat_exog_list=stat_exog_list,\n",
+ " exclude_insample_y = exclude_insample_y, \n",
" loss=loss,\n",
" valid_loss=valid_loss,\n",
" max_steps=max_steps,\n",
@@ -263,6 +278,10 @@
" early_stop_patience_steps=early_stop_patience_steps,\n",
" val_check_steps=val_check_steps,\n",
" batch_size=batch_size,\n",
+ " valid_batch_size=valid_batch_size,\n",
+ " windows_batch_size=windows_batch_size,\n",
+ " inference_windows_batch_size=inference_windows_batch_size,\n",
+ " start_padding_enabled=start_padding_enabled,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" num_workers_loader=num_workers_loader,\n",
@@ -379,14 +398,8 @@
"\n",
" forecast = forecast.permute(0, 2, 1).contiguous()\n",
" forecast = forecast.reshape(batch_size, self.h, self.loss.outputsize_multiplier * self.n_series)\n",
- " forecast = self.loss.domain_map(forecast)\n",
"\n",
- " # domain_map might have squeezed the last dimension in case n_series == 1\n",
- " # Note that this fails in case of a tuple loss, but Multivariate does not support tuple losses yet.\n",
- " if forecast.ndim == 2:\n",
- " return forecast.unsqueeze(-1)\n",
- " else:\n",
- " return forecast"
+ " return forecast"
]
},
{
@@ -423,73 +436,12 @@
"outputs": [],
"source": [
"#| hide\n",
- "import logging\n",
- "import warnings\n",
- "\n",
- "from neuralforecast import NeuralForecast\n",
- "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
- "from neuralforecast.losses.pytorch import MAE, MSE, RMSE, MAPE, SMAPE, MASE, relMSE, QuantileLoss, MQLoss, DistributionLoss,PMM, GMM, NBMM, HuberLoss, TukeyLoss, HuberQLoss, HuberMQLoss"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "# Test losses\n",
+ "# Unit tests for models\n",
"logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
- "warnings.filterwarnings(\"ignore\")\n",
- "\n",
- "Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
- "\n",
- "AirPassengersStatic_single = AirPassengersStatic[AirPassengersStatic[\"unique_id\"] == 'Airline1']\n",
- "Y_train_df_single = Y_train_df[Y_train_df[\"unique_id\"] == 'Airline1']\n",
- "Y_test_df_single = Y_test_df[Y_test_df[\"unique_id\"] == 'Airline1']\n",
- "\n",
- "losses = [MAE(), MSE(), RMSE(), MAPE(), SMAPE(), MASE(seasonality=12), relMSE(y_train=Y_train_df), QuantileLoss(q=0.5), MQLoss(), DistributionLoss(distribution='Bernoulli'), DistributionLoss(distribution='Normal'), DistributionLoss(distribution='Poisson'), DistributionLoss(distribution='StudentT'), DistributionLoss(distribution='NegativeBinomial'), DistributionLoss(distribution='Tweedie'), PMM(), GMM(), NBMM(), HuberLoss(), TukeyLoss(), HuberQLoss(q=0.5), HuberMQLoss()]\n",
- "valid_losses = [MAE(), MSE(), RMSE(), MAPE(), SMAPE(), MASE(seasonality=12), relMSE(y_train=Y_train_df), QuantileLoss(q=0.5), MQLoss(), DistributionLoss(distribution='Bernoulli'), DistributionLoss(distribution='Normal'), DistributionLoss(distribution='Poisson'), DistributionLoss(distribution='StudentT'), DistributionLoss(distribution='NegativeBinomial'), DistributionLoss(distribution='Tweedie'), PMM(), GMM(), NBMM(), HuberLoss(), TukeyLoss(), HuberQLoss(q=0.5), HuberMQLoss()]\n",
- "\n",
- "for loss, valid_loss in zip(losses, valid_losses):\n",
- " try:\n",
- " model = StemGNN(h=12,\n",
- " input_size=24,\n",
- " n_series=2,\n",
- " scaler_type='robust',\n",
- " max_steps=2,\n",
- " early_stop_patience_steps=-1,\n",
- " val_check_steps=10,\n",
- " learning_rate=1e-3,\n",
- " loss=loss,\n",
- " valid_loss=valid_loss,\n",
- " batch_size=32\n",
- " )\n",
- "\n",
- " fcst = NeuralForecast(models=[model], freq='M')\n",
- " fcst.fit(df=Y_train_df, static_df=AirPassengersStatic, val_size=12)\n",
- " forecasts = fcst.predict(futr_df=Y_test_df)\n",
- " except Exception as e:\n",
- " assert str(e) == f\"{loss} is not supported in a Multivariate model.\"\n",
- "\n",
- "\n",
- "# Test n_series = 1\n",
- "model = StemGNN(h=12,\n",
- " input_size=24,\n",
- " n_series=1,\n",
- " scaler_type='robust',\n",
- " max_steps=2,\n",
- " early_stop_patience_steps=-1,\n",
- " val_check_steps=10,\n",
- " learning_rate=1e-3,\n",
- " loss=MAE(),\n",
- " valid_loss=MAE(),\n",
- " batch_size=32\n",
- " )\n",
- "fcst = NeuralForecast(models=[model], freq='M')\n",
- "fcst.fit(df=Y_train_df_single, static_df=AirPassengersStatic_single, val_size=12)\n",
- "forecasts = fcst.predict(futr_df=Y_test_df_single) "
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(StemGNN, [\"airpassengers\"])"
]
},
{
@@ -527,13 +479,13 @@
"model = StemGNN(h=12,\n",
" input_size=24,\n",
" n_series=2,\n",
- " scaler_type='robust',\n",
- " max_steps=100,\n",
+ " scaler_type='standard',\n",
+ " max_steps=500,\n",
" early_stop_patience_steps=-1,\n",
" val_check_steps=10,\n",
" learning_rate=1e-3,\n",
" loss=MAE(),\n",
- " valid_loss=None,\n",
+ " valid_loss=MAE(),\n",
" batch_size=32\n",
" )\n",
"\n",
diff --git a/nbs/models.tcn.ipynb b/nbs/models.tcn.ipynb
index dee324513..61551f1f5 100644
--- a/nbs/models.tcn.ipynb
+++ b/nbs/models.tcn.ipynb
@@ -69,7 +69,7 @@
"import torch.nn as nn\n",
"\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_recurrent import BaseRecurrent\n",
+ "from neuralforecast.common._base_model import BaseModel\n",
"from neuralforecast.common._modules import MLP, TemporalConvolutionEncoder"
]
},
@@ -80,10 +80,11 @@
"outputs": [],
"source": [
"#| hide\n",
- "from nbdev.showdoc import show_doc\n",
- "\n",
"import logging\n",
- "import warnings"
+ "import warnings\n",
+ "from fastcore.test import test_eq\n",
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -93,7 +94,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class TCN(BaseRecurrent):\n",
+ "class TCN(BaseModel):\n",
" \"\"\" TCN\n",
"\n",
" Temporal Convolution Network (TCN), with MLP decoder.\n",
@@ -134,21 +135,22 @@
" `**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'recurrent'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = True\n",
" EXOGENOUS_STAT = True \n",
- " \n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False) \n",
+ "\n",
" def __init__(self,\n",
" h: int,\n",
" input_size: int = -1,\n",
" inference_input_size: int = -1,\n",
" kernel_size: int = 2,\n",
" dilations: List[int] = [1, 2, 4, 8, 16],\n",
- " encoder_hidden_size: int = 200,\n",
+ " encoder_hidden_size: int = 128,\n",
" encoder_activation: str = 'ReLU',\n",
" context_size: int = 10,\n",
- " decoder_hidden_size: int = 200,\n",
+ " decoder_hidden_size: int = 128,\n",
" decoder_layers: int = 2,\n",
" futr_exog_list = None,\n",
" hist_exog_list = None,\n",
@@ -162,6 +164,10 @@
" val_check_steps: int = 100,\n",
" batch_size: int = 32,\n",
" valid_batch_size: Optional[int] = None,\n",
+ " windows_batch_size = 128,\n",
+ " inference_windows_batch_size = 1024,\n",
+ " start_padding_enabled = False,\n",
+ " step_size: int = 1, \n",
" scaler_type: str ='robust',\n",
" random_seed: int = 1,\n",
" num_workers_loader = 0,\n",
@@ -185,6 +191,10 @@
" val_check_steps=val_check_steps,\n",
" batch_size=batch_size,\n",
" valid_batch_size=valid_batch_size,\n",
+ " windows_batch_size=windows_batch_size,\n",
+ " inference_windows_batch_size=inference_windows_batch_size,\n",
+ " start_padding_enabled=start_padding_enabled,\n",
+ " step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" futr_exog_list=futr_exog_list,\n",
" hist_exog_list=hist_exog_list,\n",
@@ -215,7 +225,7 @@
" self.decoder_layers = decoder_layers\n",
"\n",
" # TCN input size (1 for target variable y)\n",
- " input_encoder = 1 + self.hist_exog_size + self.stat_exog_size\n",
+ " input_encoder = 1 + self.hist_exog_size + self.stat_exog_size + self.futr_exog_size\n",
"\n",
" \n",
" #---------------------------------- Instantiate Model -----------------------------------#\n",
@@ -228,11 +238,11 @@
" activation=self.encoder_activation)\n",
"\n",
" # Context adapter\n",
- " self.context_adapter = nn.Linear(in_features=self.encoder_hidden_size + self.futr_exog_size * h,\n",
- " out_features=self.context_size * h)\n",
+ " self.context_adapter = nn.Linear(in_features=self.input_size,\n",
+ " out_features=h)\n",
"\n",
" # Decoder MLP\n",
- " self.mlp_decoder = MLP(in_features=self.context_size + self.futr_exog_size,\n",
+ " self.mlp_decoder = MLP(in_features=self.encoder_hidden_size + self.futr_exog_size,\n",
" out_features=self.loss.outputsize_multiplier,\n",
" hidden_size=self.decoder_hidden_size,\n",
" num_layers=self.decoder_layers,\n",
@@ -242,41 +252,41 @@
" def forward(self, windows_batch):\n",
" \n",
" # Parse windows_batch\n",
- " encoder_input = windows_batch['insample_y'] # [B, seq_len, 1]\n",
- " futr_exog = windows_batch['futr_exog']\n",
- " hist_exog = windows_batch['hist_exog']\n",
- " stat_exog = windows_batch['stat_exog']\n",
+ " encoder_input = windows_batch['insample_y'] # [B, L, 1]\n",
+ " futr_exog = windows_batch['futr_exog'] # [B, L + h, F]\n",
+ " hist_exog = windows_batch['hist_exog'] # [B, L, X]\n",
+ " stat_exog = windows_batch['stat_exog'] # [B, S]\n",
"\n",
- " # Concatenate y, historic and static inputs\n",
- " # [B, C, seq_len, 1] -> [B, seq_len, C]\n",
- " # Contatenate [ Y_t, | X_{t-L},..., X_{t} | S ]\n",
- " batch_size, seq_len = encoder_input.shape[:2]\n",
+ " # Concatenate y, historic and static inputs \n",
+ " batch_size, input_size = encoder_input.shape[:2]\n",
" if self.hist_exog_size > 0:\n",
- " hist_exog = hist_exog.permute(0,2,1,3).squeeze(-1) # [B, X, seq_len, 1] -> [B, seq_len, X]\n",
- " encoder_input = torch.cat((encoder_input, hist_exog), dim=2)\n",
+ " encoder_input = torch.cat((encoder_input, hist_exog), dim=2) # [B, L, 1] + [B, L, X] -> [B, L, 1 + X]\n",
"\n",
" if self.stat_exog_size > 0:\n",
- " stat_exog = stat_exog.unsqueeze(1).repeat(1, seq_len, 1) # [B, S] -> [B, seq_len, S]\n",
- " encoder_input = torch.cat((encoder_input, stat_exog), dim=2)\n",
- "\n",
- " # TCN forward\n",
- " hidden_state = self.hist_encoder(encoder_input) # [B, seq_len, tcn_hidden_state]\n",
+ " # print(encoder_input.shape)\n",
+ " stat_exog = stat_exog.unsqueeze(1).repeat(1, input_size, 1) # [B, S] -> [B, L, S]\n",
+ " encoder_input = torch.cat((encoder_input, stat_exog), dim=2) # [B, L, 1 + X] + [B, L, S] -> [B, L, 1 + X + S]\n",
"\n",
" if self.futr_exog_size > 0:\n",
- " futr_exog = futr_exog.permute(0,2,3,1)[:,:,1:,:] # [B, F, seq_len, 1+H] -> [B, seq_len, H, F]\n",
- " hidden_state = torch.cat(( hidden_state, futr_exog.reshape(batch_size, seq_len, -1)), dim=2)\n",
+ " encoder_input = torch.cat((encoder_input, \n",
+ " futr_exog[:, :input_size]), dim=2) # [B, L, 1 + X + S] + [B, L, F] -> [B, L, 1 + X + S + F]\n",
+ "\n",
+ " # TCN forward \n",
+ " hidden_state = self.hist_encoder(encoder_input) # [B, L, C]\n",
"\n",
" # Context adapter\n",
- " context = self.context_adapter(hidden_state)\n",
- " context = context.reshape(batch_size, seq_len, self.h, self.context_size)\n",
+ " hidden_state = hidden_state.permute(0, 2, 1) # [B, L, C] -> [B, C, L]\n",
+ " context = self.context_adapter(hidden_state) # [B, C, L] -> [B, C, h]\n",
"\n",
" # Residual connection with futr_exog\n",
" if self.futr_exog_size > 0:\n",
- " context = torch.cat((context, futr_exog), dim=-1)\n",
+ " futr_exog_futr = futr_exog[:, input_size:].swapaxes(1, 2) # [B, L + h, F] -> [B, F, h] \n",
+ " context = torch.cat((context, futr_exog_futr), dim=1) # [B, C, h] + [B, F, h] = [B, C + F, h]\n",
+ "\n",
+ " context = context.swapaxes(1, 2) # [B, C + F, h] -> [B, h, C + F]\n",
"\n",
" # Final forecast\n",
- " output = self.mlp_decoder(context)\n",
- " output = self.loss.domain_map(output)\n",
+ " output = self.mlp_decoder(context) # [B, h, C + F] -> [B, h, n_output]\n",
" \n",
" return output"
]
@@ -308,13 +318,6 @@
"show_doc(TCN.predict, name='TCN.predict')"
]
},
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Usage Example"
- ]
- },
{
"cell_type": "code",
"execution_count": null,
@@ -322,8 +325,19 @@
"outputs": [],
"source": [
"#| hide\n",
+ "# Unit tests for models\n",
"logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
- "warnings.filterwarnings(\"ignore\")"
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(TCN, [\"airpassengers\"])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Usage Example"
]
},
{
@@ -338,7 +352,7 @@
"\n",
"from neuralforecast import NeuralForecast\n",
"from neuralforecast.models import TCN\n",
- "from neuralforecast.losses.pytorch import GMM, MQLoss, DistributionLoss\n",
+ "from neuralforecast.losses.pytorch import DistributionLoss\n",
"from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
"\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds [B, h, n_outputs]\n",
"\n",
- " # Map to output domain\n",
- " forecast = self.loss.domain_map(x + x_skip)\n",
+ " forecast = x + x_skip\n",
" \n",
" return forecast\n"
]
@@ -383,6 +386,21 @@
"show_doc(TiDE.predict, name='TiDE.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(TiDE, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -402,7 +420,7 @@
"\n",
"from neuralforecast import NeuralForecast\n",
"from neuralforecast.models import TiDE\n",
- "from neuralforecast.losses.pytorch import GMM, DistributionLoss\n",
+ "from neuralforecast.losses.pytorch import GMM\n",
"from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
"\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds 1:\n",
+ " raise Exception('TimeLLM only supports point loss functions (MAE, MSE, etc) as loss function.') \n",
+ " \n",
+ " if valid_loss is not None and not isinstance(valid_loss, losses.BasePointLoss):\n",
+ " raise Exception('TimeLLM only supports point loss functions (MAE, MSE, etc) as valid loss function.') \n",
+ "\n",
+ "\n",
" # Architecture\n",
" self.patch_len = patch_len\n",
" self.stride = stride\n",
@@ -523,13 +533,10 @@
" return lags\n",
" \n",
" def forward(self, windows_batch):\n",
- " insample_y = windows_batch['insample_y']\n",
- "\n",
- " x = insample_y.unsqueeze(-1)\n",
+ " x = windows_batch['insample_y']\n",
"\n",
" y_pred = self.forecast(x)\n",
" y_pred = y_pred[:, -self.h:, :]\n",
- " y_pred = self.loss.domain_map(y_pred)\n",
" \n",
" return y_pred\n"
]
@@ -575,11 +582,12 @@
"outputs": [],
"source": [
"#| eval: false\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
"from neuralforecast import NeuralForecast\n",
"from neuralforecast.models import TimeLLM\n",
- "from neuralforecast.utils import AirPassengersPanel, augment_calendar_df\n",
- "\n",
- "AirPassengersPanel, calendar_cols = augment_calendar_df(df=AirPassengersPanel, freq='M')\n",
+ "from neuralforecast.utils import AirPassengersPanel\n",
"\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
diff --git a/nbs/models.timemixer.ipynb b/nbs/models.timemixer.ipynb
index 9bfdd9cc5..49801a6b8 100644
--- a/nbs/models.timemixer.ipynb
+++ b/nbs/models.timemixer.ipynb
@@ -17,8 +17,8 @@
"\n",
"Seasonal and trend components exhibit significantly different characteristics in time series, and different scales of the time series reflect different properties, with seasonal characteristics being more pronounced at a fine-grained micro scale and trend characteristics being more pronounced at a coarse macro scale, it is therefore necessary to decouple seasonal and trend components at different scales. As such, TimeMixer is an MLP-based architecture with Past-Decomposable-Mixing (PDM) and Future-Multipredictor-Mixing (FMM) blocks to take full advantage of disentangled multiscale series in both past extraction and future prediction phases.\n",
"\n",
- "**Reference**\n",
- "- [Shiyu Wang, Haixu Wu, Xiaoming Shi, Tengge Hu, Huakun Luo, Lintao Ma, James Y. Zhang, Jun Zhou.\"TimeMixer: Decomposable Multiscale Mixing For Time Series Forecasting\"](https://openreview.net/pdf?id=7oLshfEIC2)"
+ "**References**
\n",
+ "[Shiyu Wang, Haixu Wu, Xiaoming Shi, Tengge Hu, Huakun Luo, Lintao Ma, James Y. Zhang, Jun Zhou.\"TimeMixer: Decomposable Multiscale Mixing For Time Series Forecasting\"](https://openreview.net/pdf?id=7oLshfEIC2)
"
]
},
{
@@ -41,10 +41,10 @@
"import torch\n",
"import torch.nn as nn\n",
"\n",
- "from neuralforecast.common._base_multivariate import BaseMultivariate\n",
+ "from neuralforecast.common._base_model import BaseModel\n",
"from neuralforecast.common._modules import PositionalEmbedding, TokenEmbedding, TemporalEmbedding, SeriesDecomp, RevIN\n",
- "\n",
- "from neuralforecast.losses.pytorch import MAE"
+ "from neuralforecast.losses.pytorch import MAE\n",
+ "from typing import Optional"
]
},
{
@@ -54,8 +54,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -324,7 +327,7 @@
"source": [
"#| export\n",
"\n",
- "class TimeMixer(BaseMultivariate):\n",
+ "class TimeMixer(BaseModel):\n",
" \"\"\" TimeMixer\n",
" **Parameters**
\n",
" `h`: int, Forecast horizon.
\n",
@@ -354,6 +357,10 @@
" `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n",
" `val_check_steps`: int=100, Number of training steps between every validation loss check.
\n",
" `batch_size`: int=32, number of different series in each batch.
\n",
+ " `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n",
+ " `windows_batch_size`: int=256, number of windows to sample in each training batch, default uses all.
\n",
+ " `inference_windows_batch_size`: int=256, number of windows to sample in each inference batch, -1 uses all.
\n",
+ " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
" `step_size`: int=1, step size between each window of temporal data.
\n",
" `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n",
@@ -368,14 +375,15 @@
" `**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n",
"\n",
" **References**
\n",
- " [Shiyu Wang, Haixu Wu, Xiaoming Shi, Tengge Hu, Huakun Luo, Lintao Ma, James Y. Zhang, Jun Zhou.\"TimeMixer: Decomposable Multiscale Mixing For Time Series Forecasting\"](https://openreview.net/pdf?id=7oLshfEIC2)\n",
+ " [Shiyu Wang, Haixu Wu, Xiaoming Shi, Tengge Hu, Huakun Luo, Lintao Ma, James Y. Zhang, Jun Zhou.\"TimeMixer: Decomposable Multiscale Mixing For Time Series Forecasting\"](https://openreview.net/pdf?id=7oLshfEIC2)
\n",
" \"\"\"\n",
"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'multivariate'\n",
" EXOGENOUS_FUTR = False\n",
" EXOGENOUS_HIST = False\n",
" EXOGENOUS_STAT = False\n",
+ " MULTIVARIATE = True # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h,\n",
@@ -405,6 +413,10 @@
" early_stop_patience_steps: int =-1,\n",
" val_check_steps: int = 100,\n",
" batch_size: int = 32,\n",
+ " valid_batch_size: Optional[int] = None,\n",
+ " windows_batch_size = 256,\n",
+ " inference_windows_batch_size = 256,\n",
+ " start_padding_enabled = False,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'identity',\n",
" random_seed: int = 1,\n",
@@ -431,6 +443,10 @@
" early_stop_patience_steps=early_stop_patience_steps,\n",
" val_check_steps=val_check_steps,\n",
" batch_size=batch_size,\n",
+ " valid_batch_size=valid_batch_size,\n",
+ " windows_batch_size=windows_batch_size,\n",
+ " inference_windows_batch_size=inference_windows_batch_size,\n",
+ " start_padding_enabled=start_padding_enabled,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" random_seed=random_seed,\n",
@@ -522,6 +538,9 @@
" for i in range(self.down_sampling_layers + 1)\n",
" ]\n",
" )\n",
+ " \n",
+ " if self.loss.outputsize_multiplier > 1:\n",
+ " self.distr_output = nn.Linear(self.n_series, self.n_series * self.loss.outputsize_multiplier)\n",
"\n",
" def out_projection(self, dec_out, i, out_res):\n",
" dec_out = self.projection_layer(dec_out)\n",
@@ -678,13 +697,10 @@
"\n",
" y_pred = self.forecast(insample_y, x_mark_enc, x_mark_dec)\n",
" y_pred = y_pred[:, -self.h:, :]\n",
- " y_pred = self.loss.domain_map(y_pred)\n",
+ " if self.loss.outputsize_multiplier > 1:\n",
+ " y_pred = self.distr_output(y_pred)\n",
"\n",
- " # domain_map might have squeezed the last dimension in case n_series == 1\n",
- " if y_pred.ndim == 2:\n",
- " return y_pred.unsqueeze(-1)\n",
- " else:\n",
- " return y_pred"
+ " return y_pred\n"
]
},
{
@@ -714,6 +730,21 @@
"show_doc(TimeMixer.predict, name='TimeMixer.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(TimeMixer, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
diff --git a/nbs/models.timesnet.ipynb b/nbs/models.timesnet.ipynb
index 37e5d46e4..00d65688f 100644
--- a/nbs/models.timesnet.ipynb
+++ b/nbs/models.timesnet.ipynb
@@ -54,7 +54,7 @@
"import torch.fft\n",
"\n",
"from neuralforecast.common._modules import DataEmbedding\n",
- "from neuralforecast.common._base_windows import BaseWindows\n",
+ "from neuralforecast.common._base_model import BaseModel\n",
"\n",
"from neuralforecast.losses.pytorch import MAE"
]
@@ -66,8 +66,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -200,7 +203,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class TimesNet(BaseWindows):\n",
+ "class TimesNet(BaseModel):\n",
" \"\"\" TimesNet\n",
"\n",
" The TimesNet univariate model tackles the challenge of modeling multiple intraperiod and interperiod temporal variations.\n",
@@ -279,10 +282,11 @@
" Haixu Wu and Tengge Hu and Yong Liu and Hang Zhou and Jianmin Wang and Mingsheng Long. TimesNet: Temporal 2D-Variation Modeling for General Time Series Analysis. https://openreview.net/pdf?id=ju_Uqw384Oq\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'windows'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = False\n",
" EXOGENOUS_STAT = False \n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h: int, \n",
@@ -377,13 +381,9 @@
"\n",
" # Parse windows_batch\n",
" insample_y = windows_batch['insample_y']\n",
- " #insample_mask = windows_batch['insample_mask']\n",
- " #hist_exog = windows_batch['hist_exog']\n",
- " #stat_exog = windows_batch['stat_exog']\n",
" futr_exog = windows_batch['futr_exog']\n",
"\n",
" # Parse inputs\n",
- " insample_y = insample_y.unsqueeze(-1) # [Ws,L,1]\n",
" if self.futr_exog_size > 0:\n",
" x_mark_enc = futr_exog[:,:self.input_size,:]\n",
" else:\n",
@@ -398,7 +398,7 @@
" # porject back\n",
" dec_out = self.projection(enc_out)\n",
"\n",
- " forecast = self.loss.domain_map(dec_out[:, -self.h:])\n",
+ " forecast = dec_out[:, -self.h:]\n",
" return forecast"
]
},
@@ -429,6 +429,21 @@
"show_doc(TimesNet.predict, name='TimesNet.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(TimesNet, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -448,9 +463,7 @@
"\n",
"from neuralforecast import NeuralForecast\n",
"from neuralforecast.losses.pytorch import DistributionLoss\n",
- "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic, augment_calendar_df\n",
- "\n",
- "AirPassengersPanel, calendar_cols = augment_calendar_df(df=AirPassengersPanel, freq='M')\n",
+ "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
"\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
@@ -460,10 +473,9 @@
" hidden_size = 16,\n",
" conv_hidden_size = 32,\n",
" loss=DistributionLoss(distribution='Normal', level=[80, 90]),\n",
- " futr_exog_list=calendar_cols,\n",
" scaler_type='standard',\n",
" learning_rate=1e-3,\n",
- " max_steps=5,\n",
+ " max_steps=100,\n",
" val_check_steps=50,\n",
" early_stop_patience_steps=2)\n",
"\n",
diff --git a/nbs/models.tsmixer.ipynb b/nbs/models.tsmixer.ipynb
index 94a9e4125..4c01a42f3 100644
--- a/nbs/models.tsmixer.ipynb
+++ b/nbs/models.tsmixer.ipynb
@@ -44,8 +44,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -55,12 +58,13 @@
"outputs": [],
"source": [
"#| export\n",
- "import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"\n",
+ "from typing import Optional\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_multivariate import BaseMultivariate"
+ "from neuralforecast.common._base_model import BaseModel\n",
+ "from neuralforecast.common._modules import RevINMultivariate"
]
},
{
@@ -157,55 +161,6 @@
" return x"
]
},
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 1.2 Reversible InstanceNormalization\n",
- "An Instance Normalization Layer that is reversible, based on [this reference implementation](https://github.com/google-research/google-research/blob/master/tsmixer/tsmixer_basic/models/rev_in.py).
"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| export\n",
- "class ReversibleInstanceNorm1d(nn.Module):\n",
- " \"\"\" \n",
- " ReversibleInstanceNorm1d\n",
- " \"\"\" \n",
- " def __init__(self, n_series, eps=1e-5):\n",
- " super().__init__()\n",
- " self.weight = nn.Parameter(torch.ones((1, 1, n_series)))\n",
- " self.bias = nn.Parameter(torch.zeros((1, 1, n_series)))\n",
- "\n",
- " self.eps = eps\n",
- "\n",
- " def forward(self, x):\n",
- " # Batch statistics\n",
- " self.batch_mean = torch.mean(x, axis=1, keepdim=True).detach()\n",
- " self.batch_std = torch.sqrt(torch.var(x, axis=1, keepdim=True, unbiased=False) + self.eps).detach()\n",
- " \n",
- " # Instance normalization\n",
- " x = x - self.batch_mean\n",
- " x = x / self.batch_std\n",
- " x = x * self.weight\n",
- " x = x + self.bias\n",
- " \n",
- " return x\n",
- "\n",
- " def reverse(self, x):\n",
- " # Reverse the normalization\n",
- " x = x - self.bias\n",
- " x = x / self.weight \n",
- " x = x * self.batch_std\n",
- " x = x + self.batch_mean \n",
- "\n",
- " return x"
- ]
- },
{
"cell_type": "markdown",
"metadata": {},
@@ -220,7 +175,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class TSMixer(BaseMultivariate):\n",
+ "class TSMixer(BaseModel):\n",
" \"\"\" TSMixer\n",
"\n",
" Time-Series Mixer (`TSMixer`) is a MLP-based multivariate time-series forecasting model. `TSMixer` jointly learns temporal and cross-sectional representations of the time-series by repeatedly combining time- and feature information using stacked mixing layers. A mixing layer consists of a sequential time- and feature Multi Layer Perceptron (`MLP`).\n",
@@ -244,6 +199,10 @@
" `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n",
" `val_check_steps`: int=100, Number of training steps between every validation loss check.
\n",
" `batch_size`: int=32, number of different series in each batch.
\n",
+ " `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n",
+ " `windows_batch_size`: int=256, number of windows to sample in each training batch, default uses all.
\n",
+ " `inference_windows_batch_size`: int=256, number of windows to sample in each inference batch, -1 uses all.
\n",
+ " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
" `step_size`: int=1, step size between each window of temporal data.
\n",
" `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n",
@@ -262,10 +221,11 @@
"\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'multivariate'\n",
" EXOGENOUS_FUTR = False\n",
" EXOGENOUS_HIST = False\n",
" EXOGENOUS_STAT = False\n",
+ " MULTIVARIATE = True # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h,\n",
@@ -274,6 +234,7 @@
" futr_exog_list = None,\n",
" hist_exog_list = None,\n",
" stat_exog_list = None,\n",
+ " exclude_insample_y = False,\n",
" n_block = 2,\n",
" ff_dim = 64,\n",
" dropout = 0.9,\n",
@@ -286,6 +247,10 @@
" early_stop_patience_steps: int =-1,\n",
" val_check_steps: int = 100,\n",
" batch_size: int = 32,\n",
+ " valid_batch_size: Optional[int] = None,\n",
+ " windows_batch_size = 256,\n",
+ " inference_windows_batch_size = 256,\n",
+ " start_padding_enabled = False,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'identity',\n",
" random_seed: int = 1,\n",
@@ -305,6 +270,7 @@
" futr_exog_list=futr_exog_list,\n",
" hist_exog_list=hist_exog_list,\n",
" stat_exog_list=stat_exog_list,\n",
+ " exclude_insample_y = exclude_insample_y,\n",
" loss=loss,\n",
" valid_loss=valid_loss,\n",
" max_steps=max_steps,\n",
@@ -313,6 +279,10 @@
" early_stop_patience_steps=early_stop_patience_steps,\n",
" val_check_steps=val_check_steps,\n",
" batch_size=batch_size,\n",
+ " valid_batch_size=valid_batch_size,\n",
+ " windows_batch_size=windows_batch_size,\n",
+ " inference_windows_batch_size=inference_windows_batch_size,\n",
+ " start_padding_enabled=start_padding_enabled,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" random_seed=random_seed,\n",
@@ -328,7 +298,7 @@
" # Reversible InstanceNormalization layer\n",
" self.revin = revin\n",
" if self.revin:\n",
- " self.norm = ReversibleInstanceNorm1d(n_series = n_series)\n",
+ " self.norm = RevINMultivariate(num_features = n_series, affine=True)\n",
"\n",
" # Mixing layers\n",
" mixing_layers = [MixingLayer(n_series=n_series, \n",
@@ -349,23 +319,17 @@
"\n",
" # TSMixer: InstanceNorm + Mixing layers + Dense output layer + ReverseInstanceNorm\n",
" if self.revin:\n",
- " x = self.norm(x)\n",
+ " x = self.norm(x, 'norm')\n",
" x = self.mixing_layers(x)\n",
" x = x.permute(0, 2, 1)\n",
" x = self.out(x)\n",
" x = x.permute(0, 2, 1)\n",
" if self.revin:\n",
- " x = self.norm.reverse(x)\n",
+ " x = self.norm(x, 'denorm')\n",
"\n",
" x = x.reshape(batch_size, self.h, self.loss.outputsize_multiplier * self.n_series)\n",
- " forecast = self.loss.domain_map(x)\n",
- "\n",
- " # domain_map might have squeezed the last dimension in case n_series == 1\n",
- " # Note that this fails in case of a tuple loss, but Multivariate does not support tuple losses yet.\n",
- " if forecast.ndim == 2:\n",
- " return forecast.unsqueeze(-1)\n",
- " else:\n",
- " return forecast"
+ "\n",
+ " return x"
]
},
{
@@ -401,80 +365,12 @@
"metadata": {},
"outputs": [],
"source": [
- "#| hide\n",
- "import logging\n",
- "import warnings\n",
- "\n",
- "from neuralforecast import NeuralForecast\n",
- "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
- "from neuralforecast.losses.pytorch import MAE, MSE, RMSE, MAPE, SMAPE, MASE, relMSE, QuantileLoss, MQLoss, DistributionLoss,PMM, GMM, NBMM, HuberLoss, TukeyLoss, HuberQLoss, HuberMQLoss"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "# Test losses\n",
+ "# Unit tests for models\n",
"logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
- "warnings.filterwarnings(\"ignore\")\n",
- "\n",
- "Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
- "\n",
- "AirPassengersStatic_single = AirPassengersStatic[AirPassengersStatic[\"unique_id\"] == 'Airline1']\n",
- "Y_train_df_single = Y_train_df[Y_train_df[\"unique_id\"] == 'Airline1']\n",
- "Y_test_df_single = Y_test_df[Y_test_df[\"unique_id\"] == 'Airline1']\n",
- "\n",
- "losses = [MAE(), MSE(), RMSE(), MAPE(), SMAPE(), MASE(seasonality=12), relMSE(y_train=Y_train_df), QuantileLoss(q=0.5), MQLoss(), DistributionLoss(distribution='Bernoulli'), DistributionLoss(distribution='Normal'), DistributionLoss(distribution='Poisson'), DistributionLoss(distribution='StudentT'), DistributionLoss(distribution='NegativeBinomial'), DistributionLoss(distribution='Tweedie'), PMM(), GMM(), NBMM(), HuberLoss(), TukeyLoss(), HuberQLoss(q=0.5), HuberMQLoss()]\n",
- "valid_losses = [MAE(), MSE(), RMSE(), MAPE(), SMAPE(), MASE(seasonality=12), relMSE(y_train=Y_train_df), QuantileLoss(q=0.5), MQLoss(), DistributionLoss(distribution='Bernoulli'), DistributionLoss(distribution='Normal'), DistributionLoss(distribution='Poisson'), DistributionLoss(distribution='StudentT'), DistributionLoss(distribution='NegativeBinomial'), DistributionLoss(distribution='Tweedie'), PMM(), GMM(), NBMM(), HuberLoss(), TukeyLoss(), HuberQLoss(q=0.5), HuberMQLoss()]\n",
- "\n",
- "for loss, valid_loss in zip(losses, valid_losses):\n",
- " try:\n",
- " model = TSMixer(h=12,\n",
- " input_size=24,\n",
- " n_series=2,\n",
- " n_block=4,\n",
- " ff_dim=4,\n",
- " revin=True,\n",
- " scaler_type='standard',\n",
- " max_steps=2,\n",
- " early_stop_patience_steps=-1,\n",
- " val_check_steps=5,\n",
- " learning_rate=1e-3,\n",
- " loss=loss,\n",
- " valid_loss=valid_loss,\n",
- " batch_size=32\n",
- " )\n",
- "\n",
- " fcst = NeuralForecast(models=[model], freq='M')\n",
- " fcst.fit(df=Y_train_df, static_df=AirPassengersStatic, val_size=12)\n",
- " forecasts = fcst.predict(futr_df=Y_test_df)\n",
- " except Exception as e:\n",
- " assert str(e) == f\"{loss} is not supported in a Multivariate model.\"\n",
- "\n",
- "\n",
- "# Test n_series = 1\n",
- "model = TSMixer(h=12,\n",
- " input_size=24,\n",
- " n_series=1,\n",
- " n_block=4,\n",
- " ff_dim=4,\n",
- " revin=True,\n",
- " scaler_type='standard',\n",
- " max_steps=2,\n",
- " early_stop_patience_steps=-1,\n",
- " val_check_steps=5,\n",
- " learning_rate=1e-3,\n",
- " loss=MAE(),\n",
- " valid_loss=MAE(),\n",
- " batch_size=32\n",
- " )\n",
- "fcst = NeuralForecast(models=[model], freq='M')\n",
- "fcst.fit(df=Y_train_df_single, static_df=AirPassengersStatic_single, val_size=12)\n",
- "forecasts = fcst.predict(futr_df=Y_test_df_single)"
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(TSMixer, [\"airpassengers\"])"
]
},
{
@@ -504,7 +400,7 @@
"from neuralforecast import NeuralForecast\n",
"from neuralforecast.models import TSMixer\n",
"from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
- "from neuralforecast.losses.pytorch import MAE\n",
+ "from neuralforecast.losses.pytorch import MAE, MQLoss\n",
"\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
@@ -521,8 +417,7 @@
" early_stop_patience_steps=-1,\n",
" val_check_steps=5,\n",
" learning_rate=1e-3,\n",
- " loss=MAE(),\n",
- " valid_loss=MAE(),\n",
+ " loss=MQLoss(),\n",
" batch_size=32\n",
" )\n",
"\n",
@@ -536,9 +431,13 @@
"plot_df = pd.concat([Y_test_df, Y_hat_df], axis=1)\n",
"plot_df = pd.concat([Y_train_df, plot_df])\n",
"\n",
- "plot_df = plot_df[plot_df.unique_id=='Airline1'].drop('unique_id', axis=1)\n",
+ "plot_df = plot_df[plot_df.unique_id=='Airline2'].drop('unique_id', axis=1)\n",
"plt.plot(plot_df['ds'], plot_df['y'], c='black', label='True')\n",
- "plt.plot(plot_df['ds'], plot_df['TSMixer'], c='blue', label='Forecast')\n",
+ "plt.plot(plot_df['ds'], plot_df['TSMixer-median'], c='blue', label='median')\n",
+ "plt.fill_between(x=plot_df['ds'][-12:], \n",
+ " y1=plot_df['TSMixer-lo-90'][-12:].values,\n",
+ " y2=plot_df['TSMixer-hi-90'][-12:].values,\n",
+ " alpha=0.4, label='level 90')\n",
"ax.set_title('AirPassengers Forecast', fontsize=22)\n",
"ax.set_ylabel('Monthly Passengers', fontsize=20)\n",
"ax.set_xlabel('Year', fontsize=20)\n",
@@ -569,7 +468,7 @@
"Y_df = AirPassengersPanel[AirPassengersPanel['unique_id']=='Airline1']\n",
"\n",
"plt.plot(Y_df['ds'], Y_df['y'], c='black', label='True')\n",
- "plt.plot(Y_hat_df['ds'], Y_hat_df['TSMixer'], c='blue', label='Forecast')\n",
+ "plt.plot(Y_hat_df['ds'], Y_hat_df['TSMixer-median'], c='blue', label='Forecast')\n",
"ax.set_title('AirPassengers Forecast', fontsize=22)\n",
"ax.set_ylabel('Monthly Passengers', fontsize=20)\n",
"ax.set_xlabel('Year', fontsize=20)\n",
diff --git a/nbs/models.tsmixerx.ipynb b/nbs/models.tsmixerx.ipynb
index cb0ba72b6..691bdbc32 100644
--- a/nbs/models.tsmixerx.ipynb
+++ b/nbs/models.tsmixerx.ipynb
@@ -44,8 +44,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -59,8 +62,10 @@
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"\n",
+ "from typing import Optional\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_multivariate import BaseMultivariate"
+ "from neuralforecast.common._base_model import BaseModel\n",
+ "from neuralforecast.common._modules import RevINMultivariate"
]
},
{
@@ -244,7 +249,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class TSMixerx(BaseMultivariate):\n",
+ "class TSMixerx(BaseModel):\n",
" \"\"\" TSMixerx\n",
"\n",
" Time-Series Mixer exogenous (`TSMixerx`) is a MLP-based multivariate time-series forecasting model, with capability for additional exogenous inputs. `TSMixerx` jointly learns temporal and cross-sectional representations of the time-series by repeatedly combining time- and feature information using stacked mixing layers. A mixing layer consists of a sequential time- and feature Multi Layer Perceptron (`MLP`).\n",
@@ -268,6 +273,10 @@
" `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n",
" `val_check_steps`: int=100, Number of training steps between every validation loss check.
\n",
" `batch_size`: int=32, number of different series in each batch.
\n",
+ " `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
\n",
+ " `windows_batch_size`: int=256, number of windows to sample in each training batch, default uses all.
\n",
+ " `inference_windows_batch_size`: int=256, number of windows to sample in each inference batch, -1 uses all.
\n",
+ " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
\n",
" `step_size`: int=1, step size between each window of temporal data.
\n",
" `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n",
" `random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n",
@@ -286,10 +295,11 @@
"\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'multivariate'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = True\n",
" EXOGENOUS_STAT = True\n",
+ " MULTIVARIATE = True # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h,\n",
@@ -298,6 +308,7 @@
" futr_exog_list = None,\n",
" hist_exog_list = None,\n",
" stat_exog_list = None,\n",
+ " exclude_insample_y = False,\n",
" n_block = 2,\n",
" ff_dim = 64,\n",
" dropout = 0.0,\n",
@@ -310,6 +321,10 @@
" early_stop_patience_steps: int =-1,\n",
" val_check_steps: int = 100,\n",
" batch_size: int = 32,\n",
+ " valid_batch_size: Optional[int] = None,\n",
+ " windows_batch_size = 256,\n",
+ " inference_windows_batch_size = 256,\n",
+ " start_padding_enabled = False,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'identity',\n",
" random_seed: int = 1,\n",
@@ -329,6 +344,7 @@
" futr_exog_list=futr_exog_list,\n",
" hist_exog_list=hist_exog_list,\n",
" stat_exog_list=stat_exog_list,\n",
+ " exclude_insample_y = exclude_insample_y,\n",
" loss=loss,\n",
" valid_loss=valid_loss,\n",
" max_steps=max_steps,\n",
@@ -337,6 +353,10 @@
" early_stop_patience_steps=early_stop_patience_steps,\n",
" val_check_steps=val_check_steps,\n",
" batch_size=batch_size,\n",
+ " valid_batch_size=valid_batch_size,\n",
+ " windows_batch_size=windows_batch_size,\n",
+ " inference_windows_batch_size=inference_windows_batch_size,\n",
+ " start_padding_enabled=start_padding_enabled,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" random_seed=random_seed,\n",
@@ -351,7 +371,7 @@
" # Reversible InstanceNormalization layer\n",
" self.revin = revin\n",
" if self.revin:\n",
- " self.norm = ReversibleInstanceNorm1d(n_series = n_series)\n",
+ " self.norm = RevINMultivariate(num_features= n_series, affine=True)\n",
"\n",
" # Forecast horizon\n",
" self.h = h\n",
@@ -417,19 +437,19 @@
"\n",
" def forward(self, windows_batch):\n",
" # Parse batch\n",
- " x = windows_batch['insample_y'] # [batch_size (B), input_size (L), n_series (N)]\n",
- " hist_exog = windows_batch['hist_exog'] # [B, hist_exog_size (X), L, N]\n",
- " futr_exog = windows_batch['futr_exog'] # [B, futr_exog_size (F), L + h, N]\n",
- " stat_exog = windows_batch['stat_exog'] # [N, stat_exog_size (S)]\n",
+ " x = windows_batch['insample_y'] # [batch_size (B), input_size (L), n_series (N)]\n",
+ " hist_exog = windows_batch['hist_exog'] # [B, hist_exog_size (X), L, N]\n",
+ " futr_exog = windows_batch['futr_exog'] # [B, futr_exog_size (F), L + h, N]\n",
+ " stat_exog = windows_batch['stat_exog'] # [N, stat_exog_size (S)]\n",
" batch_size, input_size = x.shape[:2]\n",
"\n",
+ " # Apply revin to x\n",
+ " if self.revin:\n",
+ " x = self.norm(x, mode=\"norm\") # [B, L, N] -> [B, L, N]\n",
+ "\n",
" # Add channel dimension to x\n",
" x = x.unsqueeze(1) # [B, L, N] -> [B, 1, L, N]\n",
"\n",
- " # Apply revin to x\n",
- " if self.revin:\n",
- " x = self.norm(x) # [B, 1, L, N] -> [B, 1, L, N]\n",
- " \n",
" # Concatenate x with historical exogenous\n",
" if self.hist_exog_size > 0:\n",
" x = torch.cat((x, hist_exog), dim=1) # [B, 1, L, N] + [B, X, L, N] -> [B, 1 + X, L, N]\n",
@@ -476,26 +496,17 @@
" x = self.mixing_block(x) # [B, h, ff_dim] -> [B, h, ff_dim] \n",
" \n",
" # Fully connected output layer\n",
- " x = self.out(x) # [B, h, ff_dim] -> [B, h, N * n_outputs]\n",
+ " forecast = self.out(x) # [B, h, ff_dim] -> [B, h, N * n_outputs]\n",
" \n",
" # Reverse Instance Normalization on output\n",
" if self.revin:\n",
- " x = x.reshape(batch_size, \n",
- " self.h, \n",
- " self.loss.outputsize_multiplier,\n",
- " -1) # [B, h, N * n_outputs] -> [B, h, n_outputs, N]\n",
- " x = self.norm.reverse(x)\n",
- " x = x.reshape(batch_size, self.h, -1) # [B, h, n_outputs, N] -> [B, h, n_outputs * N]\n",
- "\n",
- " # Map to loss domain\n",
- " forecast = self.loss.domain_map(x)\n",
- "\n",
- " # domain_map might have squeezed the last dimension in case n_series == 1\n",
- " # Note that this fails in case of a tuple loss, but Multivariate does not support tuple losses yet.\n",
- " if forecast.ndim == 2:\n",
- " return forecast.unsqueeze(-1)\n",
- " else:\n",
- " return forecast"
+ " forecast = forecast.reshape(batch_size, \n",
+ " self.h * self.loss.outputsize_multiplier,\n",
+ " -1) # [B, h, N * n_outputs] -> [B, h * n_outputs, N]\n",
+ " forecast = self.norm(forecast, \"denorm\")\n",
+ " forecast = forecast.reshape(batch_size, self.h, -1) # [B, h * n_outputs, N] -> [B, h, n_outputs * N]\n",
+ "\n",
+ " return forecast"
]
},
{
@@ -531,113 +542,12 @@
"metadata": {},
"outputs": [],
"source": [
- "#| hide\n",
- "import logging\n",
- "import warnings\n",
- "import pandas as pd\n",
- "\n",
- "from neuralforecast import NeuralForecast\n",
- "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic, generate_series\n",
- "from neuralforecast.losses.pytorch import MAE, MSE, RMSE, MAPE, SMAPE, MASE, relMSE, QuantileLoss, MQLoss, DistributionLoss,PMM, GMM, NBMM, HuberLoss, TukeyLoss, HuberQLoss, HuberMQLoss\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "# Test losses\n",
+ "# Unit tests for models\n",
"logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
- "warnings.filterwarnings(\"ignore\")\n",
- "\n",
- "Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
- "\n",
- "AirPassengersStatic_single = AirPassengersStatic[AirPassengersStatic[\"unique_id\"] == 'Airline1']\n",
- "Y_train_df_single = Y_train_df[Y_train_df[\"unique_id\"] == 'Airline1']\n",
- "Y_test_df_single = Y_test_df[Y_test_df[\"unique_id\"] == 'Airline1']\n",
- "\n",
- "losses = [MAE(), MSE(), RMSE(), MAPE(), SMAPE(), MASE(seasonality=12), relMSE(y_train=Y_train_df), QuantileLoss(q=0.5), MQLoss(), DistributionLoss(distribution='Bernoulli'), DistributionLoss(distribution='Normal'), DistributionLoss(distribution='Poisson'), DistributionLoss(distribution='StudentT'), DistributionLoss(distribution='NegativeBinomial'), DistributionLoss(distribution='Tweedie'), PMM(), GMM(), NBMM(), HuberLoss(), TukeyLoss(), HuberQLoss(q=0.5), HuberMQLoss()]\n",
- "valid_losses = [MAE(), MSE(), RMSE(), MAPE(), SMAPE(), MASE(seasonality=12), relMSE(y_train=Y_train_df), QuantileLoss(q=0.5), MQLoss(), DistributionLoss(distribution='Bernoulli'), DistributionLoss(distribution='Normal'), DistributionLoss(distribution='Poisson'), DistributionLoss(distribution='StudentT'), DistributionLoss(distribution='NegativeBinomial'), DistributionLoss(distribution='Tweedie'), PMM(), GMM(), NBMM(), HuberLoss(), TukeyLoss(), HuberQLoss(q=0.5), HuberMQLoss()]\n",
- "\n",
- "for loss, valid_loss in zip(losses, valid_losses):\n",
- " try:\n",
- " model = TSMixerx(h=12,\n",
- " input_size=24,\n",
- " n_series=2,\n",
- " stat_exog_list=['airline1'],\n",
- " futr_exog_list=['trend'],\n",
- " n_block=4,\n",
- " ff_dim=4,\n",
- " revin=True,\n",
- " scaler_type='standard',\n",
- " max_steps=2,\n",
- " early_stop_patience_steps=-1,\n",
- " val_check_steps=5,\n",
- " learning_rate=1e-3,\n",
- " loss=loss,\n",
- " valid_loss=valid_loss,\n",
- " batch_size=32\n",
- " )\n",
- "\n",
- " fcst = NeuralForecast(models=[model], freq='M')\n",
- " fcst.fit(df=Y_train_df, static_df=AirPassengersStatic, val_size=12)\n",
- " forecasts = fcst.predict(futr_df=Y_test_df)\n",
- " except Exception as e:\n",
- " assert str(e) == f\"{loss} is not supported in a Multivariate model.\"\n",
- "\n",
- "\n",
- "# Test n_series = 1\n",
- "model = TSMixerx(h=12,\n",
- " input_size=24,\n",
- " n_series=1,\n",
- " stat_exog_list=['airline1'],\n",
- " futr_exog_list=['trend'],\n",
- " n_block=4,\n",
- " ff_dim=4,\n",
- " revin=True,\n",
- " scaler_type='standard',\n",
- " max_steps=2,\n",
- " early_stop_patience_steps=-1,\n",
- " val_check_steps=5,\n",
- " learning_rate=1e-3,\n",
- " loss=MAE(),\n",
- " valid_loss=MAE(),\n",
- " batch_size=32\n",
- " )\n",
- "fcst = NeuralForecast(models=[model], freq='M')\n",
- "fcst.fit(df=Y_train_df_single, static_df=AirPassengersStatic_single, val_size=12)\n",
- "forecasts = fcst.predict(futr_df=Y_test_df_single) \n",
- "\n",
- "# Test n_series > 1024\n",
- "# See issue: https://github.com/Nixtla/neuralforecast/issues/948\n",
- "n_series = 1111\n",
- "Y_df, S_df = generate_series(n_series=n_series, n_temporal_features=2, n_static_features=2)\n",
- "\n",
- "model = TSMixerx(\n",
- " h=12,\n",
- " input_size=24,\n",
- " n_series=n_series,\n",
- " stat_exog_list=['static_0', 'static_1'],\n",
- " hist_exog_list=[\"temporal_0\", \"temporal_1\"],\n",
- " n_block=4,\n",
- " ff_dim=3,\n",
- " revin=True,\n",
- " scaler_type=\"standard\",\n",
- " max_steps=5,\n",
- " early_stop_patience_steps=-1,\n",
- " val_check_steps=5,\n",
- " learning_rate=1e-3,\n",
- " loss=MAE(),\n",
- " valid_loss=MAE(),\n",
- " batch_size=32,\n",
- ")\n",
- "\n",
- "fcst = NeuralForecast(models=[model], freq=\"D\")\n",
- "fcst.fit(df=Y_df, static_df=S_df, val_size=12)\n",
- "forecasts = fcst.predict()"
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(TSMixerx, [\"airpassengers\"])"
]
},
{
@@ -667,7 +577,7 @@
"from neuralforecast import NeuralForecast\n",
"from neuralforecast.models import TSMixerx\n",
"from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
- "from neuralforecast.losses.pytorch import MAE\n",
+ "from neuralforecast.losses.pytorch import GMM\n",
"\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
@@ -680,13 +590,12 @@
" n_block=4,\n",
" ff_dim=4,\n",
" revin=True,\n",
- " scaler_type='standard',\n",
+ " scaler_type='robust',\n",
" max_steps=500,\n",
" early_stop_patience_steps=-1,\n",
" val_check_steps=5,\n",
" learning_rate=1e-3,\n",
- " loss=MAE(),\n",
- " valid_loss=MAE(),\n",
+ " loss = GMM(n_components=10, weighted=True),\n",
" batch_size=32\n",
" )\n",
"\n",
@@ -702,7 +611,11 @@
"\n",
"plot_df = plot_df[plot_df.unique_id=='Airline1'].drop('unique_id', axis=1)\n",
"plt.plot(plot_df['ds'], plot_df['y'], c='black', label='True')\n",
- "plt.plot(plot_df['ds'], plot_df['TSMixerx'], c='blue', label='Forecast')\n",
+ "plt.plot(plot_df['ds'], plot_df['TSMixerx-median'], c='blue', label='median')\n",
+ "plt.fill_between(x=plot_df['ds'][-12:], \n",
+ " y1=plot_df['TSMixerx-lo-90'][-12:].values,\n",
+ " y2=plot_df['TSMixerx-hi-90'][-12:].values,\n",
+ " alpha=0.4, label='level 90')\n",
"ax.set_title('AirPassengers Forecast', fontsize=22)\n",
"ax.set_ylabel('Monthly Passengers', fontsize=20)\n",
"ax.set_xlabel('Year', fontsize=20)\n",
@@ -733,7 +646,7 @@
"Y_df = AirPassengersPanel[AirPassengersPanel['unique_id']=='Airline1']\n",
"\n",
"plt.plot(Y_df['ds'], Y_df['y'], c='black', label='True')\n",
- "plt.plot(Y_hat_df['ds'], Y_hat_df['TSMixerx'], c='blue', label='Forecast')\n",
+ "plt.plot(Y_hat_df['ds'], Y_hat_df['TSMixerx-median'], c='blue', label='Forecast')\n",
"ax.set_title('AirPassengers Forecast', fontsize=22)\n",
"ax.set_ylabel('Monthly Passengers', fontsize=20)\n",
"ax.set_xlabel('Year', fontsize=20)\n",
diff --git a/nbs/models.vanillatransformer.ipynb b/nbs/models.vanillatransformer.ipynb
index b76cc9ba2..c28b2a4a6 100644
--- a/nbs/models.vanillatransformer.ipynb
+++ b/nbs/models.vanillatransformer.ipynb
@@ -67,7 +67,7 @@
" TransDecoderLayer, TransDecoder,\n",
" DataEmbedding, AttentionLayer,\n",
")\n",
- "from neuralforecast.common._base_windows import BaseWindows\n",
+ "from neuralforecast.common._base_model import BaseModel\n",
"\n",
"from neuralforecast.losses.pytorch import MAE"
]
@@ -79,8 +79,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -154,7 +157,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class VanillaTransformer(BaseWindows):\n",
+ "class VanillaTransformer(BaseModel):\n",
" \"\"\" VanillaTransformer\n",
"\n",
" Vanilla Transformer, following implementation of the Informer paper, used as baseline.\n",
@@ -209,10 +212,11 @@
"\t- [Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, Wancai Zhang. \"Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting\"](https://arxiv.org/abs/2012.07436)
\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'windows'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = False\n",
" EXOGENOUS_STAT = False\n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h: int, \n",
@@ -346,14 +350,8 @@
" def forward(self, windows_batch):\n",
" # Parse windows_batch\n",
" insample_y = windows_batch['insample_y']\n",
- " #insample_mask = windows_batch['insample_mask']\n",
- " #hist_exog = windows_batch['hist_exog']\n",
- " #stat_exog = windows_batch['stat_exog']\n",
- "\n",
" futr_exog = windows_batch['futr_exog']\n",
"\n",
- " insample_y = insample_y.unsqueeze(-1) # [Ws,L,1]\n",
- "\n",
" if self.futr_exog_size > 0:\n",
" x_mark_enc = futr_exog[:,:self.input_size,:]\n",
" x_mark_dec = futr_exog[:,-(self.label_len+self.h):,:]\n",
@@ -371,7 +369,7 @@
" dec_out = self.decoder(dec_out, enc_out, x_mask=None, \n",
" cross_mask=None)\n",
"\n",
- " forecast = self.loss.domain_map(dec_out[:, -self.h:])\n",
+ " forecast = dec_out[:, -self.h:]\n",
" return forecast"
]
},
@@ -402,6 +400,21 @@
"show_doc(VanillaTransformer.predict, name='VanillaTransformer.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(VanillaTransformer, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -421,9 +434,7 @@
"\n",
"from neuralforecast import NeuralForecast\n",
"from neuralforecast.models import VanillaTransformer\n",
- "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic, augment_calendar_df\n",
- "\n",
- "AirPassengersPanel, calendar_cols = augment_calendar_df(df=AirPassengersPanel, freq='M')\n",
+ "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
"\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
@@ -434,7 +445,6 @@
" conv_hidden_size=32,\n",
" n_head=2,\n",
" loss=MAE(),\n",
- " futr_exog_list=calendar_cols,\n",
" scaler_type='robust',\n",
" learning_rate=1e-3,\n",
" max_steps=500,\n",
diff --git a/nbs/utils.ipynb b/nbs/utils.ipynb
index 5b056c144..e8cb8c170 100644
--- a/nbs/utils.ipynb
+++ b/nbs/utils.ipynb
@@ -13,7 +13,16 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The autoreload extension is already loaded. To reload it, use:\n",
+ " %reload_ext autoreload\n"
+ ]
+ }
+ ],
"source": [
"#| hide\n",
"%load_ext autoreload\n",
@@ -38,12 +47,11 @@
"#| export\n",
"import random\n",
"from itertools import chain\n",
- "from typing import List, Union\n",
+ "from typing import List, Union, Optional, Tuple\n",
"from utilsforecast.compat import DFType\n",
"\n",
"import numpy as np\n",
- "import pandas as pd\n",
- "import utilsforecast.processing as ufp"
+ "import pandas as pd"
]
},
{
@@ -161,7 +169,77 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/markdown": [
+ "---\n",
+ "\n",
+ "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/utils.py#L22){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+ "\n",
+ "### generate_series\n",
+ "\n",
+ "> generate_series (n_series:int, freq:str='D', min_length:int=50,\n",
+ "> max_length:int=500, n_temporal_features:int=0,\n",
+ "> n_static_features:int=0, equal_ends:bool=False,\n",
+ "> seed:int=0)\n",
+ "\n",
+ "*Generate Synthetic Panel Series.\n",
+ "\n",
+ "Generates `n_series` of frequency `freq` of different lengths in the interval [`min_length`, `max_length`].\n",
+ "If `n_temporal_features > 0`, then each serie gets temporal features with random values.\n",
+ "If `n_static_features > 0`, then a static dataframe is returned along the temporal dataframe.\n",
+ "If `equal_ends == True` then all series end at the same date.\n",
+ "\n",
+ "**Parameters:**
\n",
+ "`n_series`: int, number of series for synthetic panel.
\n",
+ "`min_length`: int, minimal length of synthetic panel's series.
\n",
+ "`max_length`: int, minimal length of synthetic panel's series.
\n",
+ "`n_temporal_features`: int, default=0, number of temporal exogenous variables for synthetic panel's series.
\n",
+ "`n_static_features`: int, default=0, number of static exogenous variables for synthetic panel's series.
\n",
+ "`equal_ends`: bool, if True, series finish in the same date stamp `ds`.
\n",
+ "`freq`: str, frequency of the data, [panda's available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases).
\n",
+ "\n",
+ "**Returns:**
\n",
+ "`freq`: pandas.DataFrame, synthetic panel with columns [`unique_id`, `ds`, `y`] and exogenous.*"
+ ],
+ "text/plain": [
+ "---\n",
+ "\n",
+ "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/utils.py#L22){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n",
+ "\n",
+ "### generate_series\n",
+ "\n",
+ "> generate_series (n_series:int, freq:str='D', min_length:int=50,\n",
+ "> max_length:int=500, n_temporal_features:int=0,\n",
+ "> n_static_features:int=0, equal_ends:bool=False,\n",
+ "> seed:int=0)\n",
+ "\n",
+ "*Generate Synthetic Panel Series.\n",
+ "\n",
+ "Generates `n_series` of frequency `freq` of different lengths in the interval [`min_length`, `max_length`].\n",
+ "If `n_temporal_features > 0`, then each serie gets temporal features with random values.\n",
+ "If `n_static_features > 0`, then a static dataframe is returned along the temporal dataframe.\n",
+ "If `equal_ends == True` then all series end at the same date.\n",
+ "\n",
+ "**Parameters:**
\n",
+ "`n_series`: int, number of series for synthetic panel.
\n",
+ "`min_length`: int, minimal length of synthetic panel's series.
\n",
+ "`max_length`: int, minimal length of synthetic panel's series.
\n",
+ "`n_temporal_features`: int, default=0, number of temporal exogenous variables for synthetic panel's series.
\n",
+ "`n_static_features`: int, default=0, number of static exogenous variables for synthetic panel's series.
\n",
+ "`equal_ends`: bool, if True, series finish in the same date stamp `ds`.
\n",
+ "`freq`: str, frequency of the data, [panda's available frequencies](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases).
\n",
+ "\n",
+ "**Returns:**
\n",
+ "`freq`: pandas.DataFrame, synthetic panel with columns [`unique_id`, `ds`, `y`] and exogenous.*"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"show_doc(generate_series, title_level=3)"
]
@@ -170,7 +248,111 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\ospra\\AppData\\Local\\Temp\\ipykernel_16560\\470716697.py:2: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n",
+ " synthetic_panel.groupby('unique_id').head(4)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " unique_id | \n",
+ " ds | \n",
+ " y | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 2000-01-01 | \n",
+ " 0.357595 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 2000-01-02 | \n",
+ " 1.301382 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 2000-01-03 | \n",
+ " 2.272442 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 2000-01-04 | \n",
+ " 3.211827 | \n",
+ "
\n",
+ " \n",
+ " 222 | \n",
+ " 1 | \n",
+ " 2000-01-01 | \n",
+ " 5.399023 | \n",
+ "
\n",
+ " \n",
+ " 223 | \n",
+ " 1 | \n",
+ " 2000-01-02 | \n",
+ " 6.092818 | \n",
+ "
\n",
+ " \n",
+ " 224 | \n",
+ " 1 | \n",
+ " 2000-01-03 | \n",
+ " 0.476396 | \n",
+ "
\n",
+ " \n",
+ " 225 | \n",
+ " 1 | \n",
+ " 2000-01-04 | \n",
+ " 1.343744 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " unique_id ds y\n",
+ "0 0 2000-01-01 0.357595\n",
+ "1 0 2000-01-02 1.301382\n",
+ "2 0 2000-01-03 2.272442\n",
+ "3 0 2000-01-04 3.211827\n",
+ "222 1 2000-01-01 5.399023\n",
+ "223 1 2000-01-02 6.092818\n",
+ "224 1 2000-01-03 0.476396\n",
+ "225 1 2000-01-04 1.343744"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"synthetic_panel = generate_series(n_series=2)\n",
"synthetic_panel.groupby('unique_id').head(4)"
@@ -180,7 +362,61 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " static_0 | \n",
+ " static_1 | \n",
+ " unique_id | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0.748805 | \n",
+ " 0.573544 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0.234966 | \n",
+ " 0.235057 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " static_0 static_1 unique_id\n",
+ "0 0.748805 0.573544 0\n",
+ "1 0.234966 0.235057 1"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"temporal_df, static_df = generate_series(n_series=1000, n_static_features=2,\n",
" n_temporal_features=4, equal_ends=False)\n",
@@ -238,7 +474,131 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " unique_id | \n",
+ " ds | \n",
+ " y | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1.0 | \n",
+ " 1949-01-31 | \n",
+ " 112.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1.0 | \n",
+ " 1949-02-28 | \n",
+ " 118.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1.0 | \n",
+ " 1949-03-31 | \n",
+ " 132.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1.0 | \n",
+ " 1949-04-30 | \n",
+ " 129.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1.0 | \n",
+ " 1949-05-31 | \n",
+ " 121.0 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 1.0 | \n",
+ " 1949-06-30 | \n",
+ " 135.0 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 1.0 | \n",
+ " 1949-07-31 | \n",
+ " 148.0 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 1.0 | \n",
+ " 1949-08-31 | \n",
+ " 148.0 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 1.0 | \n",
+ " 1949-09-30 | \n",
+ " 136.0 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 1.0 | \n",
+ " 1949-10-31 | \n",
+ " 119.0 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 1.0 | \n",
+ " 1949-11-30 | \n",
+ " 104.0 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 1.0 | \n",
+ " 1949-12-31 | \n",
+ " 118.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " unique_id ds y\n",
+ "0 1.0 1949-01-31 112.0\n",
+ "1 1.0 1949-02-28 118.0\n",
+ "2 1.0 1949-03-31 132.0\n",
+ "3 1.0 1949-04-30 129.0\n",
+ "4 1.0 1949-05-31 121.0\n",
+ "5 1.0 1949-06-30 135.0\n",
+ "6 1.0 1949-07-31 148.0\n",
+ "7 1.0 1949-08-31 148.0\n",
+ "8 1.0 1949-09-30 136.0\n",
+ "9 1.0 1949-10-31 119.0\n",
+ "10 1.0 1949-11-30 104.0\n",
+ "11 1.0 1949-12-31 118.0"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"AirPassengersDF.head(12)"
]
@@ -247,7 +607,18 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
"#We are going to plot the ARIMA predictions, and the prediction intervals.\n",
"fig, ax = plt.subplots(1, 1, figsize = (20, 7))\n",
@@ -291,7 +662,88 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " static_0 | \n",
+ " static_1 | \n",
+ " static_2 | \n",
+ " unique_id | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0.268844 | \n",
+ " 0.875946 | \n",
+ " 0.047605 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0.995151 | \n",
+ " 0.376025 | \n",
+ " 0.497579 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0.136613 | \n",
+ " 0.060934 | \n",
+ " 0.319290 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0.084419 | \n",
+ " 0.918999 | \n",
+ " 0.820050 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0.774360 | \n",
+ " 0.685072 | \n",
+ " 0.113191 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " static_0 static_1 static_2 unique_id\n",
+ "0 0.268844 0.875946 0.047605 0\n",
+ "1 0.995151 0.376025 0.497579 1\n",
+ "2 0.136613 0.060934 0.319290 2\n",
+ "3 0.084419 0.918999 0.820050 3\n",
+ "4 0.774360 0.685072 0.113191 4"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"static_df"
]
@@ -311,7 +763,121 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " unique_id | \n",
+ " ds | \n",
+ " y | \n",
+ " trend | \n",
+ " y_[lag12] | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 140 | \n",
+ " Airline1 | \n",
+ " 1960-09-30 | \n",
+ " 508.0 | \n",
+ " 140 | \n",
+ " 463.0 | \n",
+ "
\n",
+ " \n",
+ " 141 | \n",
+ " Airline1 | \n",
+ " 1960-10-31 | \n",
+ " 461.0 | \n",
+ " 141 | \n",
+ " 407.0 | \n",
+ "
\n",
+ " \n",
+ " 142 | \n",
+ " Airline1 | \n",
+ " 1960-11-30 | \n",
+ " 390.0 | \n",
+ " 142 | \n",
+ " 362.0 | \n",
+ "
\n",
+ " \n",
+ " 143 | \n",
+ " Airline1 | \n",
+ " 1960-12-31 | \n",
+ " 432.0 | \n",
+ " 143 | \n",
+ " 405.0 | \n",
+ "
\n",
+ " \n",
+ " 284 | \n",
+ " Airline2 | \n",
+ " 1960-09-30 | \n",
+ " 808.0 | \n",
+ " 284 | \n",
+ " 763.0 | \n",
+ "
\n",
+ " \n",
+ " 285 | \n",
+ " Airline2 | \n",
+ " 1960-10-31 | \n",
+ " 761.0 | \n",
+ " 285 | \n",
+ " 707.0 | \n",
+ "
\n",
+ " \n",
+ " 286 | \n",
+ " Airline2 | \n",
+ " 1960-11-30 | \n",
+ " 690.0 | \n",
+ " 286 | \n",
+ " 662.0 | \n",
+ "
\n",
+ " \n",
+ " 287 | \n",
+ " Airline2 | \n",
+ " 1960-12-31 | \n",
+ " 732.0 | \n",
+ " 287 | \n",
+ " 705.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " unique_id ds y trend y_[lag12]\n",
+ "140 Airline1 1960-09-30 508.0 140 463.0\n",
+ "141 Airline1 1960-10-31 461.0 141 407.0\n",
+ "142 Airline1 1960-11-30 390.0 142 362.0\n",
+ "143 Airline1 1960-12-31 432.0 143 405.0\n",
+ "284 Airline2 1960-09-30 808.0 284 763.0\n",
+ "285 Airline2 1960-10-31 761.0 285 707.0\n",
+ "286 Airline2 1960-11-30 690.0 286 662.0\n",
+ "287 Airline2 1960-12-31 732.0 287 705.0"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"#| export\n",
"\n",
@@ -348,7 +914,18 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
"fig, ax = plt.subplots(1, 1, figsize = (20, 7))\n",
"plot_df = AirPassengersPanel.set_index('ds')\n",
@@ -365,7 +942,18 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
"fig, ax = plt.subplots(1, 1, figsize = (20, 7))\n",
"plot_df = AirPassengersPanel[AirPassengersPanel.unique_id=='Airline1'].set_index('ds')\n",
@@ -522,7 +1110,100 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " unique_id | \n",
+ " ds | \n",
+ " y | \n",
+ " trend | \n",
+ " y_[lag12] | \n",
+ " month | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Airline1 | \n",
+ " 1949-01-31 | \n",
+ " 112.0 | \n",
+ " 0 | \n",
+ " 112.0 | \n",
+ " -0.500000 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Airline1 | \n",
+ " 1949-02-28 | \n",
+ " 118.0 | \n",
+ " 1 | \n",
+ " 118.0 | \n",
+ " -0.409091 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Airline1 | \n",
+ " 1949-03-31 | \n",
+ " 132.0 | \n",
+ " 2 | \n",
+ " 132.0 | \n",
+ " -0.318182 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Airline1 | \n",
+ " 1949-04-30 | \n",
+ " 129.0 | \n",
+ " 3 | \n",
+ " 129.0 | \n",
+ " -0.227273 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Airline1 | \n",
+ " 1949-05-31 | \n",
+ " 121.0 | \n",
+ " 4 | \n",
+ " 121.0 | \n",
+ " -0.136364 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " unique_id ds y trend y_[lag12] month\n",
+ "0 Airline1 1949-01-31 112.0 0 112.0 -0.500000\n",
+ "1 Airline1 1949-02-28 118.0 1 118.0 -0.409091\n",
+ "2 Airline1 1949-03-31 132.0 2 132.0 -0.318182\n",
+ "3 Airline1 1949-04-30 129.0 3 129.0 -0.227273\n",
+ "4 Airline1 1949-05-31 121.0 4 121.0 -0.136364"
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"AirPassengerPanelCalendar, calendar_cols = augment_calendar_df(df=AirPassengersPanel, freq='M')\n",
"AirPassengerPanelCalendar.head()"
@@ -532,7 +1213,18 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "