diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index e8623f86c..2e81b7ca7 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -19,7 +19,7 @@ jobs: python-version: 3.8 - name: Install dependencies - run: pip install black nbdev pre-commit + run: pip install black nbdev==2.3.25 pre-commit - name: Run pre-commit run: pre-commit run --files neuralforecast/* diff --git a/nbs/models.timemixer.ipynb b/nbs/models.timemixer.ipynb index f06b214a4..87ed045a5 100644 --- a/nbs/models.timemixer.ipynb +++ b/nbs/models.timemixer.ipynb @@ -36,8 +36,6 @@ "source": [ "#| export\n", "\n", - "import numpy as np\n", - "\n", "import torch\n", "import torch.nn as nn\n", "\n", @@ -457,7 +455,7 @@ "\n", " # Class attributes\n", " SAMPLING_TYPE = 'multivariate'\n", - " EXOGENOUS_FUTR = False\n", + " EXOGENOUS_FUTR = True\n", " EXOGENOUS_HIST = False\n", " EXOGENOUS_STAT = False\n", "\n", @@ -480,7 +478,6 @@ " down_sampling_window: int = 2,\n", " down_sampling_method: str = 'avg',\n", " use_norm: bool = True,\n", - " decoder_input_size_multiplier: float = 0.5,\n", " loss = MAE(),\n", " valid_loss = None,\n", " max_steps: int = 1000,\n", @@ -525,10 +522,6 @@ " lr_scheduler_kwargs=lr_scheduler_kwargs,\n", " **trainer_kwargs)\n", " \n", - " self.label_len = int(np.ceil(input_size * decoder_input_size_multiplier))\n", - " if (self.label_len >= input_size) or (self.label_len <= 0):\n", - " raise Exception(f'Check decoder_input_size_multiplier={decoder_input_size_multiplier}, range (0,1)')\n", - " \n", " self.h = h\n", " self.input_size = input_size\n", " self.e_layers = e_layers\n", @@ -674,9 +667,9 @@ " if self.channel_independence == 1:\n", " B, T, N = x_enc.size()\n", " x_mark_dec = x_mark_dec.repeat(N, 1, 1)\n", - " self.x_mark_dec = self.enc_embedding(None, x_mark_dec)\n", + " self.x_mark_dec = self.enc_embedding(None, x_mark_dec) \n", " else:\n", - " self.x_mark_dec = self.enc_embedding(None, x_mark_dec)\n", + " self.x_mark_dec = self.enc_embedding(x_mark_enc, x_mark_dec)\n", "\n", " x_enc, x_mark_enc = self.__multi_scale_process_inputs(x_enc, x_mark_enc)\n", "\n", @@ -751,8 +744,8 @@ " futr_exog = windows_batch['futr_exog']\n", "\n", " if self.futr_exog_size > 0:\n", - " x_mark_enc = futr_exog[:, :, :self.input_size, :]\n", - " x_mark_dec = futr_exog[:, :, -(self.label_len + self.h):, :]\n", + " x_mark_enc = futr_exog[:, :, :self.input_size]\n", + " x_mark_dec = None\n", " else:\n", " x_mark_enc = None\n", " x_mark_dec = None\n", @@ -773,7 +766,147 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/models/timemixer.py#L327){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### TimeMixer\n", + "\n", + "> TimeMixer (h, input_size, n_series, stat_exog_list=None,\n", + "> hist_exog_list=None, futr_exog_list=None, d_model:int=32,\n", + "> d_ff:int=32, dropout:float=0.1, e_layers:int=4, top_k:int=5,\n", + "> decomp_method:str='moving_avg', moving_avg:int=25,\n", + "> channel_independence:int=0, down_sampling_layers:int=1,\n", + "> down_sampling_window:int=2, down_sampling_method:str='avg',\n", + "> use_norm:bool=True, loss=MAE(), valid_loss=None,\n", + "> max_steps:int=1000, learning_rate:float=0.001,\n", + "> num_lr_decays:int=-1, early_stop_patience_steps:int=-1,\n", + "> val_check_steps:int=100, batch_size:int=32, step_size:int=1,\n", + "> scaler_type:str='identity', random_seed:int=1,\n", + "> num_workers_loader:int=0, drop_last_loader:bool=False,\n", + "> optimizer=None, optimizer_kwargs=None, lr_scheduler=None,\n", + "> lr_scheduler_kwargs=None, **trainer_kwargs)\n", + "\n", + "TimeMixer\n", + "**Parameters**
\n", + "`h`: int, Forecast horizon.
\n", + "`input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n", + "`n_series`: int, number of time-series.
\n", + "`futr_exog_list`: str list, future exogenous columns.
\n", + "`hist_exog_list`: str list, historic exogenous columns.
\n", + "`stat_exog_list`: str list, static exogenous columns.
\n", + "`d_model`: int, dimension of the model.
\n", + "`d_ff`: int, dimension of the fully-connected network.
\n", + "`dropout`: float, dropout rate.
\n", + "`e_layers`: int, number of encoder layers.
\n", + "`top_k`: int, number of selected frequencies.
\n", + "`decomp_method`: str, method of series decomposition [moving_avg, dft_decomp].
\n", + "`moving_avg`: int, window size of moving average.
\n", + "`channel_independence`: int, 0: channel dependence, 1: channel independence.
\n", + "`down_sampling_layers`: int, number of downsampling layers.
\n", + "`down_sampling_window`: int, size of downsampling window.
\n", + "`down_sampling_method`: str, down sampling method [avg, max, conv].
\n", + "`use_norm`: bool, whether to normalize or not.
\n", + " `decoder_input_size_multiplier`: float = 0.5.
\n", + "`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + "`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + "`max_steps`: int=1000, maximum number of training steps.
\n", + "`learning_rate`: float=1e-3, Learning rate between (0, 1).
\n", + "`num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n", + "`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n", + "`val_check_steps`: int=100, Number of training steps between every validation loss check.
\n", + "`batch_size`: int=32, number of different series in each batch.
\n", + "`step_size`: int=1, step size between each window of temporal data.
\n", + "`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", + "`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n", + "`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", + "`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n", + "`alias`: str, optional, Custom name of the model.
\n", + "`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n", + "`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n", + "`lr_scheduler`: Subclass of 'torch.optim.lr_scheduler.LRScheduler', optional, user specified lr_scheduler instead of the default choice (StepLR).
\n", + "`lr_scheduler_kwargs`: dict, optional, list of parameters used by the user specified `lr_scheduler`.
\n", + "`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n", + "\n", + "**References**
\n", + "[Shiyu Wang, Haixu Wu, Xiaoming Shi, Tengge Hu, Huakun Luo, Lintao Ma, James Y. Zhang, Jun Zhou.\"TimeMixer: Decomposable Multiscale Mixing For Time Series Forecasting\"](https://openreview.net/pdf?id=7oLshfEIC2)" + ], + "text/plain": [ + "---\n", + "\n", + "[source](https://github.com/Nixtla/neuralforecast/blob/main/neuralforecast/models/timemixer.py#L327){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "\n", + "### TimeMixer\n", + "\n", + "> TimeMixer (h, input_size, n_series, stat_exog_list=None,\n", + "> hist_exog_list=None, futr_exog_list=None, d_model:int=32,\n", + "> d_ff:int=32, dropout:float=0.1, e_layers:int=4, top_k:int=5,\n", + "> decomp_method:str='moving_avg', moving_avg:int=25,\n", + "> channel_independence:int=0, down_sampling_layers:int=1,\n", + "> down_sampling_window:int=2, down_sampling_method:str='avg',\n", + "> use_norm:bool=True, loss=MAE(), valid_loss=None,\n", + "> max_steps:int=1000, learning_rate:float=0.001,\n", + "> num_lr_decays:int=-1, early_stop_patience_steps:int=-1,\n", + "> val_check_steps:int=100, batch_size:int=32, step_size:int=1,\n", + "> scaler_type:str='identity', random_seed:int=1,\n", + "> num_workers_loader:int=0, drop_last_loader:bool=False,\n", + "> optimizer=None, optimizer_kwargs=None, lr_scheduler=None,\n", + "> lr_scheduler_kwargs=None, **trainer_kwargs)\n", + "\n", + "TimeMixer\n", + "**Parameters**
\n", + "`h`: int, Forecast horizon.
\n", + "`input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
\n", + "`n_series`: int, number of time-series.
\n", + "`futr_exog_list`: str list, future exogenous columns.
\n", + "`hist_exog_list`: str list, historic exogenous columns.
\n", + "`stat_exog_list`: str list, static exogenous columns.
\n", + "`d_model`: int, dimension of the model.
\n", + "`d_ff`: int, dimension of the fully-connected network.
\n", + "`dropout`: float, dropout rate.
\n", + "`e_layers`: int, number of encoder layers.
\n", + "`top_k`: int, number of selected frequencies.
\n", + "`decomp_method`: str, method of series decomposition [moving_avg, dft_decomp].
\n", + "`moving_avg`: int, window size of moving average.
\n", + "`channel_independence`: int, 0: channel dependence, 1: channel independence.
\n", + "`down_sampling_layers`: int, number of downsampling layers.
\n", + "`down_sampling_window`: int, size of downsampling window.
\n", + "`down_sampling_method`: str, down sampling method [avg, max, conv].
\n", + "`use_norm`: bool, whether to normalize or not.
\n", + " `decoder_input_size_multiplier`: float = 0.5.
\n", + "`loss`: PyTorch module, instantiated train loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + "`valid_loss`: PyTorch module=`loss`, instantiated valid loss class from [losses collection](https://nixtla.github.io/neuralforecast/losses.pytorch.html).
\n", + "`max_steps`: int=1000, maximum number of training steps.
\n", + "`learning_rate`: float=1e-3, Learning rate between (0, 1).
\n", + "`num_lr_decays`: int=-1, Number of learning rate decays, evenly distributed across max_steps.
\n", + "`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
\n", + "`val_check_steps`: int=100, Number of training steps between every validation loss check.
\n", + "`batch_size`: int=32, number of different series in each batch.
\n", + "`step_size`: int=1, step size between each window of temporal data.
\n", + "`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
\n", + "`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
\n", + "`num_workers_loader`: int=os.cpu_count(), workers to be used by `TimeSeriesDataLoader`.
\n", + "`drop_last_loader`: bool=False, if True `TimeSeriesDataLoader` drops last non-full batch.
\n", + "`alias`: str, optional, Custom name of the model.
\n", + "`optimizer`: Subclass of 'torch.optim.Optimizer', optional, user specified optimizer instead of the default choice (Adam).
\n", + "`optimizer_kwargs`: dict, optional, list of parameters used by the user specified `optimizer`.
\n", + "`lr_scheduler`: Subclass of 'torch.optim.lr_scheduler.LRScheduler', optional, user specified lr_scheduler instead of the default choice (StepLR).
\n", + "`lr_scheduler_kwargs`: dict, optional, list of parameters used by the user specified `lr_scheduler`.
\n", + "`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
\n", + "\n", + "**References**
\n", + "[Shiyu Wang, Haixu Wu, Xiaoming Shi, Tengge Hu, Huakun Luo, Lintao Ma, James Y. Zhang, Jun Zhou.\"TimeMixer: Decomposable Multiscale Mixing For Time Series Forecasting\"](https://openreview.net/pdf?id=7oLshfEIC2)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(TimeMixer)" ] @@ -782,7 +915,71 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "### TimeMixer.fit\n", + "\n", + "> TimeMixer.fit (dataset, val_size=0, test_size=0, random_seed=None,\n", + "> distributed_config=None)\n", + "\n", + "Fit.\n", + "\n", + "The `fit` method, optimizes the neural network's weights using the\n", + "initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n", + "and the `loss` function as defined during the initialization.\n", + "Within `fit` we use a PyTorch Lightning `Trainer` that\n", + "inherits the initialization's `self.trainer_kwargs`, to customize\n", + "its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n", + "\n", + "The method is designed to be compatible with SKLearn-like classes\n", + "and in particular to be compatible with the StatsForecast library.\n", + "\n", + "By default the `model` is not saving training checkpoints to protect\n", + "disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n", + "\n", + "**Parameters:**
\n", + "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", + "`val_size`: int, validation size for temporal cross-validation.
\n", + "`test_size`: int, test size for temporal cross-validation.
" + ], + "text/plain": [ + "---\n", + "\n", + "### TimeMixer.fit\n", + "\n", + "> TimeMixer.fit (dataset, val_size=0, test_size=0, random_seed=None,\n", + "> distributed_config=None)\n", + "\n", + "Fit.\n", + "\n", + "The `fit` method, optimizes the neural network's weights using the\n", + "initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n", + "and the `loss` function as defined during the initialization.\n", + "Within `fit` we use a PyTorch Lightning `Trainer` that\n", + "inherits the initialization's `self.trainer_kwargs`, to customize\n", + "its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n", + "\n", + "The method is designed to be compatible with SKLearn-like classes\n", + "and in particular to be compatible with the StatsForecast library.\n", + "\n", + "By default the `model` is not saving training checkpoints to protect\n", + "disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n", + "\n", + "**Parameters:**
\n", + "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", + "`val_size`: int, validation size for temporal cross-validation.
\n", + "`test_size`: int, test size for temporal cross-validation.
" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(TimeMixer.fit, name='TimeMixer.fit')" ] @@ -791,7 +988,51 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "### TimeMixer.predict\n", + "\n", + "> TimeMixer.predict (dataset, test_size=None, step_size=1,\n", + "> random_seed=None, **data_module_kwargs)\n", + "\n", + "Predict.\n", + "\n", + "Neural network prediction with PL's `Trainer` execution of `predict_step`.\n", + "\n", + "**Parameters:**
\n", + "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", + "`test_size`: int=None, test size for temporal cross-validation.
\n", + "`step_size`: int=1, Step size between each window.
\n", + "`**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule)." + ], + "text/plain": [ + "---\n", + "\n", + "### TimeMixer.predict\n", + "\n", + "> TimeMixer.predict (dataset, test_size=None, step_size=1,\n", + "> random_seed=None, **data_module_kwargs)\n", + "\n", + "Predict.\n", + "\n", + "Neural network prediction with PL's `Trainer` execution of `predict_step`.\n", + "\n", + "**Parameters:**
\n", + "`dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
\n", + "`test_size`: int=None, test size for temporal cross-validation.
\n", + "`step_size`: int=1, Step size between each window.
\n", + "`**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule)." + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "show_doc(TimeMixer.predict, name='TimeMixer.predict')" ] @@ -816,14 +1057,395 @@ "from neuralforecast import NeuralForecast\n", "from neuralforecast.models import TimeMixer\n", "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n", - "from neuralforecast.losses.pytorch import MAE, DistributionLoss" + "from neuralforecast.losses.pytorch import MAE, MSE" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Seed set to 1\n", + "GPU available: True (mps), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + " | Name | Type | Params\n", + "------------------------------------------------------------\n", + "0 | loss | MAE | 0 \n", + "1 | valid_loss | MAE | 0 \n", + "2 | padder | ConstantPad1d | 0 \n", + "3 | scaler | TemporalNorm | 0 \n", + "4 | pdm_blocks | ModuleList | 22.6 K\n", + "5 | preprocess | SeriesDecomp | 0 \n", + "6 | enc_embedding | DataEmbedding_wo_pos | 2.6 K \n", + "7 | normalize_layers | ModuleList | 8 \n", + "8 | predict_layers | ModuleList | 456 \n", + "9 | projection_layer | Linear | 66 \n", + "10 | out_res_layers | ModuleList | 756 \n", + "11 | regression_layers | ModuleList | 456 \n", + "------------------------------------------------------------\n", + "24.6 K Trainable params\n", + "2.4 K Non-trainable params\n", + "27.0 K Total params\n", + "0.108 Total estimated model params size (MB)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "07f36aaaf2b74582b7416cfa60316011", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: | | 0/? [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "#| eval: false\n", "# Plot predictions\n", @@ -881,7 +1514,379 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: True (mps), used: True\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n", + "\n", + " | Name | Type | Params\n", + "------------------------------------------------------------\n", + "0 | loss | MAE | 0 \n", + "1 | valid_loss | MAE | 0 \n", + "2 | padder | ConstantPad1d | 0 \n", + "3 | scaler | TemporalNorm | 0 \n", + "4 | pdm_blocks | ModuleList | 22.6 K\n", + "5 | preprocess | SeriesDecomp | 0 \n", + "6 | enc_embedding | DataEmbedding_wo_pos | 2.6 K \n", + "7 | normalize_layers | ModuleList | 8 \n", + "8 | predict_layers | ModuleList | 456 \n", + "9 | projection_layer | Linear | 66 \n", + "10 | out_res_layers | ModuleList | 756 \n", + "11 | regression_layers | ModuleList | 456 \n", + "------------------------------------------------------------\n", + "24.6 K Trainable params\n", + "2.4 K Non-trainable params\n", + "27.0 K Total params\n", + "0.108 Total estimated model params size (MB)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "90ed6397e68147b489f26568e03f39aa", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: | | 0/? [00:00 16\u001b[0m \u001b[43mfcst\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mY_train_df\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstatic_df\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mAirPassengersStatic\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mval_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m12\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 17\u001b[0m forecasts \u001b[38;5;241m=\u001b[39m fcst\u001b[38;5;241m.\u001b[39mpredict(futr_df\u001b[38;5;241m=\u001b[39mY_test_df)\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/neuralforecast/core.py:543\u001b[0m, in \u001b[0;36mNeuralForecast.fit\u001b[0;34m(self, df, static_df, val_size, sort_df, use_init_models, verbose, id_col, time_col, target_col, distributed_config)\u001b[0m\n\u001b[1;32m 540\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reset_models()\n\u001b[1;32m 542\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, model \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodels):\n\u001b[0;32m--> 543\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodels[i] \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 544\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdataset\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mval_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mval_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdistributed_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdistributed_config\u001b[49m\n\u001b[1;32m 545\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 547\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_fitted \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/neuralforecast/common/_base_multivariate.py:547\u001b[0m, in \u001b[0;36mBaseMultivariate.fit\u001b[0;34m(self, dataset, val_size, test_size, random_seed, distributed_config)\u001b[0m\n\u001b[1;32m 543\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m distributed_config \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 544\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 545\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmultivariate models cannot be trained using distributed data parallel.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 546\u001b[0m )\n\u001b[0;32m--> 547\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fit\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 548\u001b[0m \u001b[43m \u001b[49m\u001b[43mdataset\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdataset\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 549\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mn_series\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 550\u001b[0m \u001b[43m \u001b[49m\u001b[43mvalid_batch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mn_series\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 551\u001b[0m \u001b[43m \u001b[49m\u001b[43mval_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mval_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 552\u001b[0m \u001b[43m \u001b[49m\u001b[43mtest_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtest_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 553\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_seed\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_seed\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 554\u001b[0m \u001b[43m \u001b[49m\u001b[43mshuffle_train\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 555\u001b[0m \u001b[43m \u001b[49m\u001b[43mdistributed_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 556\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/neuralforecast/common/_base_model.py:356\u001b[0m, in \u001b[0;36mBaseModel._fit\u001b[0;34m(self, dataset, batch_size, valid_batch_size, val_size, test_size, random_seed, shuffle_train, distributed_config)\u001b[0m\n\u001b[1;32m 354\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\n\u001b[1;32m 355\u001b[0m trainer \u001b[38;5;241m=\u001b[39m pl\u001b[38;5;241m.\u001b[39mTrainer(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodel\u001b[38;5;241m.\u001b[39mtrainer_kwargs)\n\u001b[0;32m--> 356\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdatamodule\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdatamodule\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 357\u001b[0m model\u001b[38;5;241m.\u001b[39mmetrics \u001b[38;5;241m=\u001b[39m trainer\u001b[38;5;241m.\u001b[39mcallback_metrics\n\u001b[1;32m 358\u001b[0m model\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__dict__\u001b[39m\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_trainer\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py:544\u001b[0m, in \u001b[0;36mTrainer.fit\u001b[0;34m(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)\u001b[0m\n\u001b[1;32m 542\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mstatus \u001b[38;5;241m=\u001b[39m TrainerStatus\u001b[38;5;241m.\u001b[39mRUNNING\n\u001b[1;32m 543\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 544\u001b[0m \u001b[43mcall\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_and_handle_interrupt\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 545\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fit_impl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrain_dataloaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mval_dataloaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdatamodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mckpt_path\u001b[49m\n\u001b[1;32m 546\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py:44\u001b[0m, in \u001b[0;36m_call_and_handle_interrupt\u001b[0;34m(trainer, trainer_fn, *args, **kwargs)\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m trainer\u001b[38;5;241m.\u001b[39mstrategy\u001b[38;5;241m.\u001b[39mlauncher \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 43\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m trainer\u001b[38;5;241m.\u001b[39mstrategy\u001b[38;5;241m.\u001b[39mlauncher\u001b[38;5;241m.\u001b[39mlaunch(trainer_fn, \u001b[38;5;241m*\u001b[39margs, trainer\u001b[38;5;241m=\u001b[39mtrainer, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m---> 44\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtrainer_fn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m _TunerExitException:\n\u001b[1;32m 47\u001b[0m _call_teardown_hook(trainer)\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py:580\u001b[0m, in \u001b[0;36mTrainer._fit_impl\u001b[0;34m(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)\u001b[0m\n\u001b[1;32m 573\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mfn \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 574\u001b[0m ckpt_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_checkpoint_connector\u001b[38;5;241m.\u001b[39m_select_ckpt_path(\n\u001b[1;32m 575\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mfn,\n\u001b[1;32m 576\u001b[0m ckpt_path,\n\u001b[1;32m 577\u001b[0m model_provided\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 578\u001b[0m model_connected\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlightning_module \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 579\u001b[0m )\n\u001b[0;32m--> 580\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mckpt_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mckpt_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 582\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mstopped\n\u001b[1;32m 583\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py:989\u001b[0m, in \u001b[0;36mTrainer._run\u001b[0;34m(self, model, ckpt_path)\u001b[0m\n\u001b[1;32m 984\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_signal_connector\u001b[38;5;241m.\u001b[39mregister_signal_handlers()\n\u001b[1;32m 986\u001b[0m \u001b[38;5;66;03m# ----------------------------\u001b[39;00m\n\u001b[1;32m 987\u001b[0m \u001b[38;5;66;03m# RUN THE TRAINER\u001b[39;00m\n\u001b[1;32m 988\u001b[0m \u001b[38;5;66;03m# ----------------------------\u001b[39;00m\n\u001b[0;32m--> 989\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_stage\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 991\u001b[0m \u001b[38;5;66;03m# ----------------------------\u001b[39;00m\n\u001b[1;32m 992\u001b[0m \u001b[38;5;66;03m# POST-Training CLEAN UP\u001b[39;00m\n\u001b[1;32m 993\u001b[0m \u001b[38;5;66;03m# ----------------------------\u001b[39;00m\n\u001b[1;32m 994\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: trainer tearing down\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py:1033\u001b[0m, in \u001b[0;36mTrainer._run_stage\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1031\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining:\n\u001b[1;32m 1032\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m isolate_rng():\n\u001b[0;32m-> 1033\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_sanity_check\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1034\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mautograd\u001b[38;5;241m.\u001b[39mset_detect_anomaly(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_detect_anomaly):\n\u001b[1;32m 1035\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfit_loop\u001b[38;5;241m.\u001b[39mrun()\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py:1062\u001b[0m, in \u001b[0;36mTrainer._run_sanity_check\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1059\u001b[0m call\u001b[38;5;241m.\u001b[39m_call_callback_hooks(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mon_sanity_check_start\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1061\u001b[0m \u001b[38;5;66;03m# run eval step\u001b[39;00m\n\u001b[0;32m-> 1062\u001b[0m \u001b[43mval_loop\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1064\u001b[0m call\u001b[38;5;241m.\u001b[39m_call_callback_hooks(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mon_sanity_check_end\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1066\u001b[0m \u001b[38;5;66;03m# reset logger connector\u001b[39;00m\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/pytorch_lightning/loops/utilities.py:182\u001b[0m, in \u001b[0;36m_no_grad_context.._decorator\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 180\u001b[0m context_manager \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mno_grad\n\u001b[1;32m 181\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m context_manager():\n\u001b[0;32m--> 182\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mloop_run\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/pytorch_lightning/loops/evaluation_loop.py:134\u001b[0m, in \u001b[0;36m_EvaluationLoop.run\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 132\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbatch_progress\u001b[38;5;241m.\u001b[39mis_last_batch \u001b[38;5;241m=\u001b[39m data_fetcher\u001b[38;5;241m.\u001b[39mdone\n\u001b[1;32m 133\u001b[0m \u001b[38;5;66;03m# run step hooks\u001b[39;00m\n\u001b[0;32m--> 134\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_evaluation_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbatch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbatch_idx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdataloader_idx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdataloader_iter\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 135\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m:\n\u001b[1;32m 136\u001b[0m \u001b[38;5;66;03m# this needs to wrap the `*_step` call too (not just `next`) for `dataloader_iter` support\u001b[39;00m\n\u001b[1;32m 137\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/pytorch_lightning/loops/evaluation_loop.py:391\u001b[0m, in \u001b[0;36m_EvaluationLoop._evaluation_step\u001b[0;34m(self, batch, batch_idx, dataloader_idx, dataloader_iter)\u001b[0m\n\u001b[1;32m 385\u001b[0m hook_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtest_step\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m trainer\u001b[38;5;241m.\u001b[39mtesting \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvalidation_step\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 386\u001b[0m step_args \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 387\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_build_step_args_from_hook_kwargs(hook_kwargs, hook_name)\n\u001b[1;32m 388\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m using_dataloader_iter\n\u001b[1;32m 389\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m (dataloader_iter,)\n\u001b[1;32m 390\u001b[0m )\n\u001b[0;32m--> 391\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43mcall\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_strategy_hook\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrainer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhook_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mstep_args\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 393\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbatch_progress\u001b[38;5;241m.\u001b[39mincrement_processed()\n\u001b[1;32m 395\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m using_dataloader_iter:\n\u001b[1;32m 396\u001b[0m \u001b[38;5;66;03m# update the hook kwargs now that the step method might have consumed the iterator\u001b[39;00m\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py:309\u001b[0m, in \u001b[0;36m_call_strategy_hook\u001b[0;34m(trainer, hook_name, *args, **kwargs)\u001b[0m\n\u001b[1;32m 306\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 308\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m trainer\u001b[38;5;241m.\u001b[39mprofiler\u001b[38;5;241m.\u001b[39mprofile(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[Strategy]\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtrainer\u001b[38;5;241m.\u001b[39mstrategy\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mhook_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m--> 309\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 311\u001b[0m \u001b[38;5;66;03m# restore current_fx when nested context\u001b[39;00m\n\u001b[1;32m 312\u001b[0m pl_module\u001b[38;5;241m.\u001b[39m_current_fx_name \u001b[38;5;241m=\u001b[39m prev_fx_name\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py:403\u001b[0m, in \u001b[0;36mStrategy.validation_step\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 401\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlightning_module:\n\u001b[1;32m 402\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_redirection(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlightning_module, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvalidation_step\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 403\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlightning_module\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidation_step\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/neuralforecast/common/_base_multivariate.py:429\u001b[0m, in \u001b[0;36mBaseMultivariate.validation_step\u001b[0;34m(self, batch, batch_idx)\u001b[0m\n\u001b[1;32m 420\u001b[0m windows_batch \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(\n\u001b[1;32m 421\u001b[0m insample_y\u001b[38;5;241m=\u001b[39minsample_y, \u001b[38;5;66;03m# [Ws, L, n_series]\u001b[39;00m\n\u001b[1;32m 422\u001b[0m insample_mask\u001b[38;5;241m=\u001b[39minsample_mask, \u001b[38;5;66;03m# [Ws, L, n_series]\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 425\u001b[0m stat_exog\u001b[38;5;241m=\u001b[39mstat_exog,\n\u001b[1;32m 426\u001b[0m ) \u001b[38;5;66;03m# [n_series, S]\u001b[39;00m\n\u001b[1;32m 428\u001b[0m \u001b[38;5;66;03m# Model Predictions\u001b[39;00m\n\u001b[0;32m--> 429\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mwindows_batch\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 430\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mloss\u001b[38;5;241m.\u001b[39mis_distribution_output:\n\u001b[1;32m 431\u001b[0m outsample_y, y_loc, y_scale \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_inv_normalization(\n\u001b[1;32m 432\u001b[0m y_hat\u001b[38;5;241m=\u001b[39moutsample_y, temporal_cols\u001b[38;5;241m=\u001b[39mbatch[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtemporal_cols\u001b[39m\u001b[38;5;124m\"\u001b[39m], y_idx\u001b[38;5;241m=\u001b[39my_idx\n\u001b[1;32m 433\u001b[0m )\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1500\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/neuralforecast/models/timemixer.py:715\u001b[0m, in \u001b[0;36mTimeMixer.forward\u001b[0;34m(self, windows_batch)\u001b[0m\n\u001b[1;32m 712\u001b[0m x_mark_enc \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 713\u001b[0m x_mark_dec \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 715\u001b[0m y_pred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforecast\u001b[49m\u001b[43m(\u001b[49m\u001b[43minsample_y\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mx_mark_enc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mx_mark_dec\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 716\u001b[0m y_pred \u001b[38;5;241m=\u001b[39m y_pred[:, \u001b[38;5;241m-\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mh :, :]\n\u001b[1;32m 717\u001b[0m y_pred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mloss\u001b[38;5;241m.\u001b[39mdomain_map(y_pred)\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/neuralforecast/models/timemixer.py:623\u001b[0m, in \u001b[0;36mTimeMixer.forecast\u001b[0;34m(self, x_enc, x_mark_enc, x_mark_dec)\u001b[0m\n\u001b[1;32m 621\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mx_mark_dec \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39menc_embedding(\u001b[38;5;28;01mNone\u001b[39;00m, x_mark_dec)\n\u001b[1;32m 622\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 623\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mx_mark_dec \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43menc_embedding\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mx_mark_dec\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 625\u001b[0m x_enc, x_mark_enc \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__multi_scale_process_inputs(x_enc, x_mark_enc)\n\u001b[1;32m 627\u001b[0m x_list \u001b[38;5;241m=\u001b[39m []\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1500\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/neuralforecast/models/timemixer.py:120\u001b[0m, in \u001b[0;36mDataEmbedding_wo_pos.forward\u001b[0;34m(self, x, x_mark)\u001b[0m\n\u001b[1;32m 118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtemporal_embedding(x_mark)\n\u001b[1;32m 119\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m x_mark \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 120\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalue_embedding\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 121\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 122\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvalue_embedding(x) \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtemporal_embedding(x_mark)\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1499\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1500\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m 1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n", + "File \u001b[0;32m~/miniconda3/envs/neuralforecast/lib/python3.10/site-packages/neuralforecast/common/_modules.py:388\u001b[0m, in \u001b[0;36mTokenEmbedding.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 387\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, x):\n\u001b[0;32m--> 388\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtokenConv(\u001b[43mx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpermute\u001b[49m(\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m2\u001b[39m, \u001b[38;5;241m1\u001b[39m))\u001b[38;5;241m.\u001b[39mtranspose(\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m)\n\u001b[1;32m 389\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m x\n", + "\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'permute'" + ] + } + ], "source": [ "#| eval: false\n", - "# Plot predictions\n", - "fig, ax = plt.subplots(1, 1, figsize = (20, 7))\n", - "Y_hat_df = forecasts.loc['Airline1']\n", - "Y_df = AirPassengersPanel[AirPassengersPanel['unique_id']=='Airline1']\n", "\n", - "plt.plot(Y_df['ds'], Y_df['y'], c='black', label='True')\n", - "plt.plot(Y_hat_df['ds'], Y_hat_df['TimeMixer'], c='blue', label='Forecast')\n", - "ax.set_title('AirPassengers Forecast', fontsize=22)\n", - "ax.set_ylabel('Monthly Passengers', fontsize=20)\n", - "ax.set_xlabel('Year', fontsize=20)\n", - "ax.legend(prop={'size': 15})\n", - "ax.grid()" + "Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n", + "\n", + "model = TimeMixer(h=12,\n", + " input_size=24,\n", + " n_series=2,\n", + " futr_exog_list=['trend'],\n", + " loss=MSE(),\n", + " valid_loss=MAE(),\n", + " early_stop_patience_steps=3,\n", + " batch_size=32)\n", + "\n", + "fcst = NeuralForecast(models=[model], freq='M')\n", + "fcst.fit(df=Y_train_df, static_df=AirPassengersStatic, val_size=12)\n", + "forecasts = fcst.predict(futr_df=Y_test_df)" ] } ], diff --git a/neuralforecast/_modidx.py b/neuralforecast/_modidx.py index 4b4d5a080..783a82704 100644 --- a/neuralforecast/_modidx.py +++ b/neuralforecast/_modidx.py @@ -500,7 +500,60 @@ 'neuralforecast/losses/pytorch.py'), 'neuralforecast.losses.pytorch.weighted_average': ( 'losses.pytorch.html#weighted_average', 'neuralforecast/losses/pytorch.py')}, - 'neuralforecast.models.autoformer': {}, + 'neuralforecast.models.autoformer': { 'neuralforecast.models.autoformer.AutoCorrelation': ( 'models.autoformer.html#autocorrelation', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.AutoCorrelation.__init__': ( 'models.autoformer.html#autocorrelation.__init__', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.AutoCorrelation.forward': ( 'models.autoformer.html#autocorrelation.forward', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.AutoCorrelation.time_delay_agg_full': ( 'models.autoformer.html#autocorrelation.time_delay_agg_full', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.AutoCorrelation.time_delay_agg_inference': ( 'models.autoformer.html#autocorrelation.time_delay_agg_inference', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.AutoCorrelation.time_delay_agg_training': ( 'models.autoformer.html#autocorrelation.time_delay_agg_training', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.AutoCorrelationLayer': ( 'models.autoformer.html#autocorrelationlayer', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.AutoCorrelationLayer.__init__': ( 'models.autoformer.html#autocorrelationlayer.__init__', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.AutoCorrelationLayer.forward': ( 'models.autoformer.html#autocorrelationlayer.forward', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.Autoformer': ( 'models.autoformer.html#autoformer', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.Autoformer.__init__': ( 'models.autoformer.html#autoformer.__init__', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.Autoformer.forward': ( 'models.autoformer.html#autoformer.forward', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.Decoder': ( 'models.autoformer.html#decoder', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.Decoder.__init__': ( 'models.autoformer.html#decoder.__init__', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.Decoder.forward': ( 'models.autoformer.html#decoder.forward', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.DecoderLayer': ( 'models.autoformer.html#decoderlayer', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.DecoderLayer.__init__': ( 'models.autoformer.html#decoderlayer.__init__', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.DecoderLayer.forward': ( 'models.autoformer.html#decoderlayer.forward', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.Encoder': ( 'models.autoformer.html#encoder', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.Encoder.__init__': ( 'models.autoformer.html#encoder.__init__', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.Encoder.forward': ( 'models.autoformer.html#encoder.forward', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.EncoderLayer': ( 'models.autoformer.html#encoderlayer', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.EncoderLayer.__init__': ( 'models.autoformer.html#encoderlayer.__init__', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.EncoderLayer.forward': ( 'models.autoformer.html#encoderlayer.forward', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.LayerNorm': ( 'models.autoformer.html#layernorm', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.LayerNorm.__init__': ( 'models.autoformer.html#layernorm.__init__', + 'neuralforecast/models/autoformer.py'), + 'neuralforecast.models.autoformer.LayerNorm.forward': ( 'models.autoformer.html#layernorm.forward', + 'neuralforecast/models/autoformer.py')}, 'neuralforecast.models.bitcn': { 'neuralforecast.models.bitcn.BiTCN': ( 'models.bitcn.html#bitcn', 'neuralforecast/models/bitcn.py'), 'neuralforecast.models.bitcn.BiTCN.__init__': ( 'models.bitcn.html#bitcn.__init__', diff --git a/neuralforecast/models/timemixer.py b/neuralforecast/models/timemixer.py index 755fdfd0d..ed9615a90 100644 --- a/neuralforecast/models/timemixer.py +++ b/neuralforecast/models/timemixer.py @@ -5,8 +5,6 @@ 'PastDecomposableMixing', 'TimeMixer'] # %% ../../nbs/models.timemixer.ipynb 3 -import numpy as np - import torch import torch.nn as nn @@ -374,7 +372,7 @@ class TimeMixer(BaseMultivariate): # Class attributes SAMPLING_TYPE = "multivariate" - EXOGENOUS_FUTR = False + EXOGENOUS_FUTR = True EXOGENOUS_HIST = False EXOGENOUS_STAT = False @@ -398,7 +396,6 @@ def __init__( down_sampling_window: int = 2, down_sampling_method: str = "avg", use_norm: bool = True, - decoder_input_size_multiplier: float = 0.5, loss=MAE(), valid_loss=None, max_steps: int = 1000, @@ -416,7 +413,7 @@ def __init__( optimizer_kwargs=None, lr_scheduler=None, lr_scheduler_kwargs=None, - **trainer_kwargs, + **trainer_kwargs ): super(TimeMixer, self).__init__( @@ -443,15 +440,9 @@ def __init__( optimizer_kwargs=optimizer_kwargs, lr_scheduler=lr_scheduler, lr_scheduler_kwargs=lr_scheduler_kwargs, - **trainer_kwargs, + **trainer_kwargs ) - self.label_len = int(np.ceil(input_size * decoder_input_size_multiplier)) - if (self.label_len >= input_size) or (self.label_len <= 0): - raise Exception( - f"Check decoder_input_size_multiplier={decoder_input_size_multiplier}, range (0,1)" - ) - self.h = h self.input_size = input_size self.e_layers = e_layers @@ -629,7 +620,7 @@ def forecast(self, x_enc, x_mark_enc, x_mark_dec): x_mark_dec = x_mark_dec.repeat(N, 1, 1) self.x_mark_dec = self.enc_embedding(None, x_mark_dec) else: - self.x_mark_dec = self.enc_embedding(None, x_mark_dec) + self.x_mark_dec = self.enc_embedding(x_mark_enc, x_mark_dec) x_enc, x_mark_enc = self.__multi_scale_process_inputs(x_enc, x_mark_enc) @@ -715,8 +706,8 @@ def forward(self, windows_batch): futr_exog = windows_batch["futr_exog"] if self.futr_exog_size > 0: - x_mark_enc = futr_exog[:, :, : self.input_size, :] - x_mark_dec = futr_exog[:, :, -(self.label_len + self.h) :, :] + x_mark_enc = futr_exog[:, :, : self.input_size] + x_mark_dec = None else: x_mark_enc = None x_mark_dec = None