From 4561ef1d3c28ecbd234c53dba002c577bb8ab2b2 Mon Sep 17 00:00:00 2001 From: Olivier Sprangers Date: Tue, 8 Oct 2024 10:42:47 +0200 Subject: [PATCH] fix_issues --- nbs/core.ipynb | 167 +++------- .../tutorials/20_conformal_prediction.ipynb | 311 +++++++++++++----- nbs/utils.ipynb | 29 +- neuralforecast/_modidx.py | 18 +- neuralforecast/core.py | 73 ++-- neuralforecast/utils.py | 31 +- 6 files changed, 350 insertions(+), 279 deletions(-) diff --git a/nbs/core.ipynb b/nbs/core.ipynb index 81688d3e7..a0d508c83 100644 --- a/nbs/core.ipynb +++ b/nbs/core.ipynb @@ -96,7 +96,7 @@ " TimeMixer, KAN, RMoK\n", ")\n", "from neuralforecast.common._base_auto import BaseAuto, MockTrial\n", - "from neuralforecast.utils import ConformalIntervals, get_conformal_method" + "from neuralforecast.utils import PredictionIntervals, get_prediction_interval_method" ] }, { @@ -507,7 +507,7 @@ " time_col: str = 'ds',\n", " target_col: str = 'y',\n", " distributed_config: Optional[DistributedConfig] = None,\n", - " conformal_intervals: Optional[ConformalIntervals] = None,\n", + " prediction_intervals: Optional[PredictionIntervals] = None,\n", " ) -> None:\n", " \"\"\"Fit the core.NeuralForecast.\n", "\n", @@ -537,7 +537,7 @@ " Column that contains the target.\n", " distributed_config : neuralforecast.DistributedConfig\n", " Configuration to use for DDP training. Currently only spark is supported.\n", - " conformal_intervals : ConformalIntervals, optional (default=None)\n", + " prediction_intervals : PredictionIntervals, optional (default=None)\n", " Configuration to calibrate prediction intervals (Conformal Prediction). \n", "\n", " Returns\n", @@ -556,7 +556,7 @@ " raise Exception('Set val_size>0 if early stopping is enabled.')\n", " \n", " self._cs_df: Optional[DFType] = None\n", - " self.conformal_intervals: Optional[ConformalIntervals] = None\n", + " self.prediction_intervals: Optional[PredictionIntervals] = None\n", "\n", " # Process and save new dataset (in self)\n", " if isinstance(df, (pd.DataFrame, pl_DataFrame)):\n", @@ -610,9 +610,8 @@ " if self.dataset.min_size < val_size:\n", " warnings.warn('Validation set size is larger than the shorter time-series.')\n", "\n", - " if conformal_intervals is not None:\n", - " # conformal prediction\n", - " self.conformal_intervals = conformal_intervals\n", + " if prediction_intervals is not None:\n", + " self.prediction_intervals = prediction_intervals\n", " self._cs_df = self._conformity_scores(\n", " df=df,\n", " id_col=id_col,\n", @@ -726,12 +725,11 @@ "\n", " return futr_exog | set(hist_exog)\n", " \n", - " def _get_model_names(self, conformal=False, enable_quantiles=False) -> List[str]:\n", + " def _get_model_names(self, add_level=False) -> List[str]:\n", " names: List[str] = []\n", " count_names = {'model': 0}\n", " for model in self.models:\n", - " if conformal and not enable_quantiles and model.loss.outputsize_multiplier > 1:\n", - " # skip prediction intervals on quantile outputs\n", + " if add_level and model.loss.outputsize_multiplier > 1:\n", " continue\n", "\n", " model_name = repr(model)\n", @@ -856,7 +854,7 @@ " sort_df: bool = True,\n", " verbose: bool = False,\n", " engine = None,\n", - " conformal_level: Optional[List[Union[int, float]]] = None,\n", + " level: Optional[List[Union[int, float]]] = None,\n", " **data_kwargs\n", " ):\n", " \"\"\"Predict with core.NeuralForecast.\n", @@ -878,8 +876,8 @@ " Print processing steps.\n", " engine : spark session\n", " Distributed engine for inference. Only used if df is a spark dataframe or if fit was called on a spark dataframe.\n", - " conformal_level : list of ints or floats, optional (default=None)\n", - " Confidence levels between 0 and 100 for conformal intervals.\n", + " level : list of ints or floats, optional (default=None)\n", + " Confidence levels between 0 and 100.\n", " data_kwargs : kwargs\n", " Extra arguments to be passed to the dataset within each model.\n", "\n", @@ -1015,24 +1013,21 @@ " _warn_id_as_idx()\n", " fcsts_df = fcsts_df.set_index(self.id_col)\n", "\n", - " # perform conformal predictions\n", - " if conformal_level is not None:\n", - " if self._cs_df is None or self.conformal_intervals is None:\n", - " warn_msg = (\n", - " 'Please rerun the `fit` method passing a valid conformal_interval settings to compute conformity scores'\n", - " )\n", - " warnings.warn(warn_msg, UserWarning)\n", + " # add prediction intervals\n", + " if level is not None:\n", + " if self._cs_df is None or self.prediction_intervals is None:\n", + " raise Exception('You must fit the model with prediction_intervals to use level.')\n", " else:\n", - " level_ = sorted(conformal_level)\n", - " model_names = self._get_model_names(conformal=True, enable_quantiles=self.conformal_intervals.enable_quantiles)\n", - " conformal_method = get_conformal_method(self.conformal_intervals.method)\n", + " level_ = sorted(level)\n", + " model_names = self._get_model_names(add_level=True)\n", + " prediction_interval_method = get_prediction_interval_method(self.prediction_intervals.method)\n", "\n", - " fcsts_df = conformal_method(\n", + " fcsts_df = prediction_interval_method(\n", " fcsts_df,\n", " self._cs_df,\n", " model_names=list(model_names),\n", " level=level_,\n", - " cs_n_windows=self.conformal_intervals.n_windows,\n", + " cs_n_windows=self.prediction_intervals.n_windows,\n", " n_series=len(uids),\n", " horizon=self.h,\n", " )\n", @@ -1522,8 +1517,7 @@ " \"id_col\": self.id_col,\n", " \"time_col\": self.time_col,\n", " \"target_col\": self.target_col,\n", - " # conformal prediction\n", - " \"conformal_intervals\": self.conformal_intervals,\n", + " \"prediction_intervals\": self.prediction_intervals,\n", " \"_cs_df\": self._cs_df, # conformity score\n", " }\n", " if save_dataset:\n", @@ -1613,7 +1607,7 @@ " for attr in ['id_col', 'time_col', 'target_col']:\n", " setattr(neuralforecast, attr, config_dict[attr])\n", " # only restore attribute if available\n", - " for attr in ['conformal_intervals', '_cs_df']:\n", + " for attr in ['prediction_intervals', '_cs_df']:\n", " if attr in config_dict.keys():\n", " setattr(neuralforecast, attr, config_dict[attr])\n", "\n", @@ -1647,9 +1641,9 @@ " \"\"\"Compute conformity scores.\n", " \n", " We need at least two cross validation errors to compute\n", - " quantiles for prediction intervals (`n_windows=2`, specified by self.conformal_intervals).\n", + " quantiles for prediction intervals (`n_windows=2`, specified by self.prediction_intervals).\n", " \n", - " The exception is raised by the ConformalIntervals data class.\n", + " The exception is raised by the PredictionIntervals data class.\n", "\n", " df: Optional[Union[DataFrame, SparkDataFrame, Sequence[str]]] = None,\n", " id_col: str = 'unique_id',\n", @@ -1657,11 +1651,11 @@ " target_col: str = 'y',\n", " static_df: Optional[Union[DataFrame, SparkDataFrame]] = None,\n", " \"\"\"\n", - " if self.conformal_intervals is None:\n", - " raise AttributeError('Please rerun the `fit` method passing a valid conformal_interval settings to compute conformity scores')\n", + " if self.prediction_intervals is None:\n", + " raise AttributeError('Please rerun the `fit` method passing a valid prediction_interval setting to compute conformity scores')\n", " \n", " min_size = ufp.counts_by_id(df, id_col)['counts'].min()\n", - " min_samples = self.h * self.conformal_intervals.n_windows + 1\n", + " min_samples = self.h * self.prediction_intervals.n_windows + 1\n", " if min_size < min_samples:\n", " raise ValueError(\n", " \"Minimum required samples in each serie for the prediction intervals \"\n", @@ -1672,7 +1666,7 @@ " cv_results = self.cross_validation(\n", " df=df,\n", " static_df=static_df,\n", - " n_windows=self.conformal_intervals.n_windows,\n", + " n_windows=self.prediction_intervals.n_windows,\n", " id_col=id_col,\n", " time_col=time_col,\n", " target_col=target_col,\n", @@ -1680,7 +1674,7 @@ " \n", " kept = [time_col, id_col, 'cutoff']\n", " # conformity score for each model\n", - " for model in self._get_model_names(conformal=True, enable_quantiles=self.conformal_intervals.enable_quantiles):\n", + " for model in self._get_model_names(add_level=True):\n", " kept.append(model)\n", "\n", " # compute absolute error for each model\n", @@ -2622,9 +2616,9 @@ " ],\n", " freq='M'\n", ")\n", - "conformal_intervals = ConformalIntervals()\n", - "fcst.fit(AirPassengersPanel_train, conformal_intervals=conformal_intervals)\n", - "forecasts1 = fcst.predict(futr_df=AirPassengersPanel_test, conformal_level=[50])\n", + "prediction_intervals = PredictionIntervals()\n", + "fcst.fit(AirPassengersPanel_train, prediction_intervals=prediction_intervals)\n", + "forecasts1 = fcst.predict(futr_df=AirPassengersPanel_test, level=[50])\n", "save_paths = ['./examples/debug_run/']\n", "try:\n", " s3fs.S3FileSystem().ls('s3://nixtla-tmp') \n", @@ -2638,7 +2632,7 @@ "for path in save_paths:\n", " fcst.save(path=path, model_index=None, overwrite=True, save_dataset=True)\n", " fcst2 = NeuralForecast.load(path=path)\n", - " forecasts2 = fcst2.predict(futr_df=AirPassengersPanel_test, conformal_level=[50])\n", + " forecasts2 = fcst2.predict(futr_df=AirPassengersPanel_test, level=[50])\n", " pd.testing.assert_frame_equal(forecasts1, forecasts2[forecasts1.columns])" ] }, @@ -3332,19 +3326,19 @@ "#| hide\n", "# test conformal prediction, method=conformal_distribution\n", "\n", - "conformal_intervals = ConformalIntervals()\n", + "prediction_intervals = PredictionIntervals()\n", "\n", "models = []\n", - "for nf_model in [NHITS, RNN, StemGNN]:\n", + "for nf_model in [NHITS, RNN, TSMixer]:\n", " params = {\"h\": 12, \"input_size\": 24, \"max_steps\": 1}\n", - " if nf_model.__name__ == \"StemGNN\":\n", + " if nf_model.__name__ == \"TSMixer\":\n", " params.update({\"n_series\": 2})\n", " models.append(nf_model(**params))\n", "\n", "\n", "nf = NeuralForecast(models=models, freq='M')\n", - "nf.fit(AirPassengersPanel_train, conformal_intervals=conformal_intervals)\n", - "preds = nf.predict(futr_df=AirPassengersPanel_test, conformal_level=[10, 50, 90])" + "nf.fit(AirPassengersPanel_train, prediction_intervals=prediction_intervals)\n", + "preds = nf.predict(futr_df=AirPassengersPanel_test, level=[90])" ] }, { @@ -3358,19 +3352,19 @@ "#| polars\n", "# test conformal prediction works for polar dataframe\n", "\n", - "conformal_intervals = ConformalIntervals()\n", + "prediction_intervals = PredictionIntervals()\n", "\n", "models = []\n", - "for nf_model in [NHITS, RNN, StemGNN]:\n", + "for nf_model in [NHITS, RNN, TSMixer]:\n", " params = {\"h\": 12, \"input_size\": 24, \"max_steps\": 1}\n", - " if nf_model.__name__ == \"StemGNN\":\n", + " if nf_model.__name__ == \"TSMixer\":\n", " params.update({\"n_series\": 2})\n", " models.append(nf_model(**params))\n", "\n", "\n", "nf = NeuralForecast(models=models, freq='1mo')\n", - "nf.fit(AirPassengers_pl, conformal_intervals=conformal_intervals, time_col='time', id_col='uid', target_col='target')\n", - "preds = nf.predict(conformal_level=[10, 50, 90])" + "nf.fit(AirPassengers_pl, prediction_intervals=prediction_intervals, time_col='time', id_col='uid', target_col='target')\n", + "preds = nf.predict(level=[90])" ] }, { @@ -3383,84 +3377,19 @@ "#| hide\n", "# test conformal prediction, method=conformal_error\n", "\n", - "conformal_intervals = ConformalIntervals(method=\"conformal_error\")\n", + "prediction_intervals = PredictionIntervals(method=\"conformal_error\")\n", "\n", "models = []\n", - "for nf_model in [NHITS, RNN, StemGNN]:\n", + "for nf_model in [NHITS, RNN, TSMixer]:\n", " params = {\"h\": 12, \"input_size\": 24, \"max_steps\": 1}\n", - " if nf_model.__name__ == \"StemGNN\":\n", + " if nf_model.__name__ == \"TSMixer\":\n", " params.update({\"n_series\": 2})\n", " models.append(nf_model(**params))\n", "\n", "\n", "nf = NeuralForecast(models=models, freq='M')\n", - "nf.fit(AirPassengersPanel_train, conformal_intervals=conformal_intervals)\n", - "preds = nf.predict(futr_df=AirPassengersPanel_test, conformal_level=[10, 50, 90])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d25b2cd2", - "metadata": {}, - "outputs": [], - "source": [ - "#| hide\n", - "# test conformal prediction are not applied for models with quantiled-related loss\n", - "# by default (ConformalIntervals.enable_quantiles=False)\n", - "\n", - "conformal_intervals = ConformalIntervals()\n", - "\n", - "models = []\n", - "for nf_model in [NHITS, RNN]:\n", - " params = {\"h\": 12, \"input_size\": 24, \"max_steps\": 1}\n", - " if nf_model.__name__ == \"NHITS\":\n", - " params.update({\"loss\": MQLoss(level=[80])})\n", - " models.append(nf_model(**params))\n", - "\n", - "\n", - "nf = NeuralForecast(models=models, freq='M')\n", - "nf.fit(AirPassengersPanel_train, conformal_intervals=conformal_intervals)\n", - "preds = nf.predict(futr_df=AirPassengersPanel_test, conformal_level=[10, 50, 90])\n", - "\n", - "pred_cols = [\n", - " 'NHITS-median', 'NHITS-lo-80', 'NHITS-hi-80', 'RNN',\n", - " 'RNN-conformal-lo-90', 'RNN-conformal-lo-50', 'RNN-conformal-lo-10',\n", - " 'RNN-conformal-hi-10', 'RNN-conformal-hi-50', 'RNN-conformal-hi-90'\n", - "]\n", - "assert all([col in preds.columns for col in pred_cols])\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7b980087", - "metadata": {}, - "outputs": [], - "source": [ - "#| hide\n", - "# test conformal predictions applied to quantiles if ConformalIntervals.enable_quantiles=True\n", - "\n", - "conformal_intervals = ConformalIntervals(enable_quantiles=True)\n", - "\n", - "nf = NeuralForecast(models=[NHITS(h=12, input_size=24, max_steps=1, loss=MQLoss(level=[80]))], freq='M')\n", - "nf.fit(AirPassengersPanel_train, conformal_intervals=conformal_intervals)\n", - "preds = nf.predict(futr_df=AirPassengersPanel_test, conformal_level=[10, 50, 90])\n", - "\n", - "pred_cols = [\n", - " 'NHITS-median', 'NHITS-lo-80', 'NHITS-hi-80',\n", - " 'NHITS-median-conformal-lo-90', 'NHITS-median-conformal-lo-50',\n", - " 'NHITS-median-conformal-lo-10', 'NHITS-median-conformal-hi-10',\n", - " 'NHITS-median-conformal-hi-50', 'NHITS-median-conformal-hi-90',\n", - " 'NHITS-lo-80-conformal-lo-90', 'NHITS-lo-80-conformal-lo-50',\n", - " 'NHITS-lo-80-conformal-lo-10', 'NHITS-lo-80-conformal-hi-10',\n", - " 'NHITS-lo-80-conformal-hi-50', 'NHITS-lo-80-conformal-hi-90',\n", - " 'NHITS-hi-80-conformal-lo-90', 'NHITS-hi-80-conformal-lo-50',\n", - " 'NHITS-hi-80-conformal-lo-10', 'NHITS-hi-80-conformal-hi-10',\n", - " 'NHITS-hi-80-conformal-hi-50', 'NHITS-hi-80-conformal-hi-90'\n", - "]\n", - "\n", - "assert all([col in preds.columns for col in pred_cols])\n" + "nf.fit(AirPassengersPanel_train, prediction_intervals=prediction_intervals)\n", + "preds = nf.predict(futr_df=AirPassengersPanel_test, level=[90])" ] } ], diff --git a/nbs/docs/tutorials/20_conformal_prediction.ipynb b/nbs/docs/tutorials/20_conformal_prediction.ipynb index 9be8dbcb2..e6b5c883d 100644 --- a/nbs/docs/tutorials/20_conformal_prediction.ipynb +++ b/nbs/docs/tutorials/20_conformal_prediction.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Conformal Prediction\n", + "# Uncertainty quantification with Conformal Prediction\n", "> Tutorial on how to train neuralforecast models and obtain prediction intervals using the conformal prediction methods" ] }, @@ -38,10 +38,9 @@ "import matplotlib.pyplot as plt\n", "from neuralforecast import NeuralForecast\n", "from neuralforecast.models import NHITS\n", - "from neuralforecast.models import NLinear\n", "from neuralforecast.utils import AirPassengersPanel\n", - "from neuralforecast.utils import ConformalIntervals\n", - "from neuralforecast.losses.pytorch import DistributionLoss\n" + "from neuralforecast.utils import PredictionIntervals\n", + "from neuralforecast.losses.pytorch import DistributionLoss, MAE" ] }, { @@ -60,7 +59,7 @@ "source": [ "## Data\n", "\n", - "We simply use the AirPassengers dataset for the demonstration of conformal prediction.\n" + "We use the AirPassengers dataset for the demonstration of conformal prediction.\n" ] }, { @@ -81,11 +80,14 @@ "source": [ "## Model training\n", "\n", - "We now train a NHITS model on the above dataset. To support conformal predictions, we must first instantiate the `ConformalIntervals` class and pass this to the `fit` method. By default, `ConformalIntervals` class employs `n_windows=2` for the corss-validation during the computation of conformity scores. We also train a MLP model using DistributionLoss to demonstate the difference between conformal prediction and quantiled outputs. \n", + "We now train a NHITS model on the above dataset. To support conformal predictions, we must first instantiate the `PredictionIntervals` class and pass this to the `fit` method. By default, `PredictionIntervals` class employs `n_windows=2` for the corss-validation during the computation of conformity scores. We also train a MLP model using DistributionLoss to demonstate the difference between conformal prediction and quantiled outputs. \n", "\n", - "
\n", + "By default, `PredictionIntervals` class employs `method=conformal_distribution` for the conformal predictions, but it also supports `method=conformal_error`. The `conformal_distribution` method calculates forecast paths using the absolute errors and based on them calculates quantiles. The `conformal_error` method calculates quantiles directly from errors.\n", "\n", - "By default, `ConformalIntervals` class employs method=conformal_distribution for the conformal predictions. `method=conformal_error` is also supported. The `conformal_distribution` method calculates forecast paths using the absolute errors and based on them calculates quantiles. The `conformal_error` calculates quantiles directly from errors.\n" + "We consider two models below:\n", + "\n", + "1. A model trained using a point loss function (`MAE`), where we quantify the uncertainty using conformal prediction. This case is labeled with `NHITS`.\n", + "2. A model trained using a `DistributionLoss('Normal')`, where we quantify the uncertainty by training the model to fit the parameters of a Normal distribution. This case is labeled with `NHITS1`.\n" ] }, { @@ -102,73 +104,212 @@ ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - " " - ] + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "7e95f40e90284ad88509b20e11bde568", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Sanity Checking: | | 0/? [00:00" ] @@ -225,16 +377,27 @@ ], "source": [ "fig, (ax1, ax2) = plt.subplots(2, 1, figsize = (20, 7))\n", - "plot_df = pd.concat([AirPassengersPanel_train, preds]).set_index('ds')\n", + "plot_df = pd.concat([AirPassengersPanel_train, preds])\n", "\n", "plot_df = plot_df[plot_df['unique_id']=='Airline1'].drop(['unique_id','trend','y_[lag12]'], axis=1).iloc[-50:]\n", - "plot_df.drop([x for x in plot_df.columns if 'NHITS1' in x], axis=1).plot(ax=ax1, linewidth=2)\n", - "plot_df.drop([x for x in plot_df.columns if 'NHITS-' in x or x == \"NHITS\"], axis=1).plot(ax=ax2, linewidth=2)\n", "\n", + "ax1.plot(plot_df['ds'], plot_df['y'], c='black', label='True')\n", + "ax1.plot(plot_df['ds'], plot_df['NHITS1'], c='blue', label='median')\n", + "ax1.fill_between(x=plot_df['ds'][-12:], \n", + " y1=plot_df['NHITS1-lo-90'][-12:].values,\n", + " y2=plot_df['NHITS1-hi-90'][-12:].values,\n", + " alpha=0.4, label='level 90')\n", "ax1.set_title('AirPassengers Forecast', fontsize=18)\n", "ax1.set_ylabel('Monthly Passengers', fontsize=15)\n", "ax1.legend(prop={'size': 10})\n", "ax1.grid()\n", + "\n", + "ax2.plot(plot_df['ds'], plot_df['y'], c='black', label='True')\n", + "ax2.plot(plot_df['ds'], plot_df['NHITS'], c='blue', label='median')\n", + "ax2.fill_between(x=plot_df['ds'][-12:], \n", + " y1=plot_df['NHITS-lo-90'][-12:].values,\n", + " y2=plot_df['NHITS-hi-90'][-12:].values,\n", + " alpha=0.4, label='level 90')\n", "ax2.set_ylabel('Monthly Passengers', fontsize=15)\n", "ax2.set_xlabel('Timestamp [t]', fontsize=15)\n", "ax2.legend(prop={'size': 10})\n", diff --git a/nbs/utils.ipynb b/nbs/utils.ipynb index cd9497e9c..e202c9adc 100644 --- a/nbs/utils.ipynb +++ b/nbs/utils.ipynb @@ -561,7 +561,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# 5. Conformal Prediction" + "# 5. Prediction Intervals" ] }, { @@ -572,25 +572,20 @@ "source": [ "#| export\n", "\n", - "class ConformalIntervals:\n", - " \"\"\"Class for storing conformal intervals metadata information.\"\"\"\n", + "class PredictionIntervals:\n", + " \"\"\"Class for storing prediction intervals metadata information.\"\"\"\n", "\n", " def __init__(\n", " self,\n", " n_windows: int = 2,\n", " method: str = \"conformal_distribution\",\n", - " enable_quantiles: bool = False,\n", " ):\n", " \"\"\" \n", " n_windows : int\n", " Number of windows to evaluate.\n", " method : str, default is conformal_distribution\n", - " One of the supported methods for the computation of conformal prediction:\n", + " One of the supported methods for the computation of prediction intervals:\n", " conformal_error or conformal_distribution\n", - " enable_quantiles : bool, default is False\n", - " If set to True, we create prediction intervals on top of quantiled outputs, e.g. prediction made \n", - " with MQLoss(level=[80]) will be conformalized with the respective conformal\n", - " levels (prediction columns having 'model-lo/hi-80-conformal-lo/hi-#').\n", " \"\"\"\n", " if n_windows < 2:\n", " raise ValueError(\n", @@ -601,10 +596,9 @@ " raise ValueError(f\"method must be one of {allowed_methods}\")\n", " self.n_windows = n_windows\n", " self.method = method\n", - " self.enable_quantiles = enable_quantiles\n", "\n", " def __repr__(self):\n", - " return f\"ConformalIntervals(n_windows={self.n_windows}, method='{self.method}')\"" + " return f\"PredictionIntervals(n_windows={self.n_windows}, method='{self.method}')\"" ] }, { @@ -614,7 +608,6 @@ "outputs": [], "source": [ "#| export\n", - "\n", "def add_conformal_distribution_intervals(\n", " fcst_df: DFType, \n", " cs_df: DFType,\n", @@ -645,8 +638,8 @@ " axis=0,\n", " )\n", " quantiles = quantiles.reshape(len(cuts), -1).T\n", - " lo_cols = [f\"{model}-conformal-lo-{lv}\" for lv in reversed(level)]\n", - " hi_cols = [f\"{model}-conformal-hi-{lv}\" for lv in level]\n", + " lo_cols = [f\"{model}-lo-{lv}\" for lv in reversed(level)]\n", + " hi_cols = [f\"{model}-hi-{lv}\" for lv in level]\n", " out_cols = lo_cols + hi_cols\n", " fcst_df = ufp.assign_columns(fcst_df, out_cols, quantiles)\n", " return fcst_df" @@ -659,7 +652,6 @@ "outputs": [], "source": [ "#| export\n", - "\n", "def add_conformal_error_intervals(\n", " fcst_df: DFType, \n", " cs_df: DFType, \n", @@ -687,8 +679,8 @@ " axis=0,\n", " )\n", " quantiles = quantiles.reshape(len(cuts), -1)\n", - " lo_cols = [f\"{model}-conformal-lo-{lv}\" for lv in reversed(level)]\n", - " hi_cols = [f\"{model}-conformal-hi-{lv}\" for lv in level]\n", + " lo_cols = [f\"{model}-lo-{lv}\" for lv in reversed(level)]\n", + " hi_cols = [f\"{model}-hi-{lv}\" for lv in level]\n", " quantiles = np.vstack([mean - quantiles[::-1], mean + quantiles]).T\n", " columns = lo_cols + hi_cols\n", " fcst_df = ufp.assign_columns(fcst_df, columns, quantiles)\n", @@ -702,8 +694,7 @@ "outputs": [], "source": [ "#| export\n", - "\n", - "def get_conformal_method(method: str):\n", + "def get_prediction_interval_method(method: str):\n", " available_methods = {\n", " \"conformal_distribution\": add_conformal_distribution_intervals,\n", " \"conformal_error\": add_conformal_error_intervals,\n", diff --git a/neuralforecast/_modidx.py b/neuralforecast/_modidx.py index 41ae2929f..a49b3529e 100644 --- a/neuralforecast/_modidx.py +++ b/neuralforecast/_modidx.py @@ -1444,13 +1444,7 @@ 'neuralforecast/tsdataset.py'), 'neuralforecast.tsdataset._FilesDataset.__init__': ( 'tsdataset.html#_filesdataset.__init__', 'neuralforecast/tsdataset.py')}, - 'neuralforecast.utils': { 'neuralforecast.utils.ConformalIntervals': ( 'utils.html#conformalintervals', - 'neuralforecast/utils.py'), - 'neuralforecast.utils.ConformalIntervals.__init__': ( 'utils.html#conformalintervals.__init__', - 'neuralforecast/utils.py'), - 'neuralforecast.utils.ConformalIntervals.__repr__': ( 'utils.html#conformalintervals.__repr__', - 'neuralforecast/utils.py'), - 'neuralforecast.utils.DayOfMonth': ('utils.html#dayofmonth', 'neuralforecast/utils.py'), + 'neuralforecast.utils': { 'neuralforecast.utils.DayOfMonth': ('utils.html#dayofmonth', 'neuralforecast/utils.py'), 'neuralforecast.utils.DayOfMonth.__call__': ( 'utils.html#dayofmonth.__call__', 'neuralforecast/utils.py'), 'neuralforecast.utils.DayOfWeek': ('utils.html#dayofweek', 'neuralforecast/utils.py'), @@ -1468,6 +1462,12 @@ 'neuralforecast.utils.MonthOfYear': ('utils.html#monthofyear', 'neuralforecast/utils.py'), 'neuralforecast.utils.MonthOfYear.__call__': ( 'utils.html#monthofyear.__call__', 'neuralforecast/utils.py'), + 'neuralforecast.utils.PredictionIntervals': ( 'utils.html#predictionintervals', + 'neuralforecast/utils.py'), + 'neuralforecast.utils.PredictionIntervals.__init__': ( 'utils.html#predictionintervals.__init__', + 'neuralforecast/utils.py'), + 'neuralforecast.utils.PredictionIntervals.__repr__': ( 'utils.html#predictionintervals.__repr__', + 'neuralforecast/utils.py'), 'neuralforecast.utils.SecondOfMinute': ('utils.html#secondofminute', 'neuralforecast/utils.py'), 'neuralforecast.utils.SecondOfMinute.__call__': ( 'utils.html#secondofminute.__call__', 'neuralforecast/utils.py'), @@ -1488,9 +1488,9 @@ 'neuralforecast.utils.augment_calendar_df': ( 'utils.html#augment_calendar_df', 'neuralforecast/utils.py'), 'neuralforecast.utils.generate_series': ('utils.html#generate_series', 'neuralforecast/utils.py'), - 'neuralforecast.utils.get_conformal_method': ( 'utils.html#get_conformal_method', - 'neuralforecast/utils.py'), 'neuralforecast.utils.get_indexer_raise_missing': ( 'utils.html#get_indexer_raise_missing', 'neuralforecast/utils.py'), + 'neuralforecast.utils.get_prediction_interval_method': ( 'utils.html#get_prediction_interval_method', + 'neuralforecast/utils.py'), 'neuralforecast.utils.time_features_from_frequency_str': ( 'utils.html#time_features_from_frequency_str', 'neuralforecast/utils.py')}}} diff --git a/neuralforecast/core.py b/neuralforecast/core.py index 5d881d208..180a8a004 100644 --- a/neuralforecast/core.py +++ b/neuralforecast/core.py @@ -69,7 +69,7 @@ RMoK, ) from .common._base_auto import BaseAuto, MockTrial -from .utils import ConformalIntervals, get_conformal_method +from .utils import PredictionIntervals, get_prediction_interval_method # %% ../nbs/core.ipynb 5 # this disables warnings about the number of workers in the dataloaders @@ -439,7 +439,7 @@ def fit( time_col: str = "ds", target_col: str = "y", distributed_config: Optional[DistributedConfig] = None, - conformal_intervals: Optional[ConformalIntervals] = None, + prediction_intervals: Optional[PredictionIntervals] = None, ) -> None: """Fit the core.NeuralForecast. @@ -469,7 +469,7 @@ def fit( Column that contains the target. distributed_config : neuralforecast.DistributedConfig Configuration to use for DDP training. Currently only spark is supported. - conformal_intervals : ConformalIntervals, optional (default=None) + prediction_intervals : PredictionIntervals, optional (default=None) Configuration to calibrate prediction intervals (Conformal Prediction). Returns @@ -488,7 +488,7 @@ def fit( raise Exception("Set val_size>0 if early stopping is enabled.") self._cs_df: Optional[DFType] = None - self.conformal_intervals: Optional[ConformalIntervals] = None + self.prediction_intervals: Optional[PredictionIntervals] = None # Process and save new dataset (in self) if isinstance(df, (pd.DataFrame, pl_DataFrame)): @@ -546,9 +546,8 @@ def fit( "Validation set size is larger than the shorter time-series." ) - if conformal_intervals is not None: - # conformal prediction - self.conformal_intervals = conformal_intervals + if prediction_intervals is not None: + self.prediction_intervals = prediction_intervals self._cs_df = self._conformity_scores( df=df, id_col=id_col, @@ -666,16 +665,11 @@ def _get_needed_exog(self): return futr_exog | set(hist_exog) - def _get_model_names(self, conformal=False, enable_quantiles=False) -> List[str]: + def _get_model_names(self, add_level=False) -> List[str]: names: List[str] = [] count_names = {"model": 0} for model in self.models: - if ( - conformal - and not enable_quantiles - and model.loss.outputsize_multiplier > 1 - ): - # skip prediction intervals on quantile outputs + if add_level and model.loss.outputsize_multiplier > 1: continue model_name = repr(model) @@ -808,7 +802,7 @@ def predict( sort_df: bool = True, verbose: bool = False, engine=None, - conformal_level: Optional[List[Union[int, float]]] = None, + level: Optional[List[Union[int, float]]] = None, **data_kwargs, ): """Predict with core.NeuralForecast. @@ -830,8 +824,8 @@ def predict( Print processing steps. engine : spark session Distributed engine for inference. Only used if df is a spark dataframe or if fit was called on a spark dataframe. - conformal_level : list of ints or floats, optional (default=None) - Confidence levels between 0 and 100 for conformal intervals. + level : list of ints or floats, optional (default=None) + Confidence levels between 0 and 100. data_kwargs : kwargs Extra arguments to be passed to the dataset within each model. @@ -973,25 +967,25 @@ def predict( _warn_id_as_idx() fcsts_df = fcsts_df.set_index(self.id_col) - # perform conformal predictions - if conformal_level is not None: - if self._cs_df is None or self.conformal_intervals is None: - warn_msg = "Please rerun the `fit` method passing a valid conformal_interval settings to compute conformity scores" - warnings.warn(warn_msg, UserWarning) + # add prediction intervals + if level is not None: + if self._cs_df is None or self.prediction_intervals is None: + raise Exception( + "You must fit the model with prediction_intervals to use level." + ) else: - level_ = sorted(conformal_level) - model_names = self._get_model_names( - conformal=True, - enable_quantiles=self.conformal_intervals.enable_quantiles, + level_ = sorted(level) + model_names = self._get_model_names(add_level=True) + prediction_interval_method = get_prediction_interval_method( + self.prediction_intervals.method ) - conformal_method = get_conformal_method(self.conformal_intervals.method) - fcsts_df = conformal_method( + fcsts_df = prediction_interval_method( fcsts_df, self._cs_df, model_names=list(model_names), level=level_, - cs_n_windows=self.conformal_intervals.n_windows, + cs_n_windows=self.prediction_intervals.n_windows, n_series=len(uids), horizon=self.h, ) @@ -1505,8 +1499,7 @@ def save( "id_col": self.id_col, "time_col": self.time_col, "target_col": self.target_col, - # conformal prediction - "conformal_intervals": self.conformal_intervals, + "prediction_intervals": self.prediction_intervals, "_cs_df": self._cs_df, # conformity score } if save_dataset: @@ -1605,7 +1598,7 @@ def load(path, verbose=False, **kwargs): for attr in ["id_col", "time_col", "target_col"]: setattr(neuralforecast, attr, config_dict[attr]) # only restore attribute if available - for attr in ["conformal_intervals", "_cs_df"]: + for attr in ["prediction_intervals", "_cs_df"]: if attr in config_dict.keys(): setattr(neuralforecast, attr, config_dict[attr]) @@ -1639,9 +1632,9 @@ def _conformity_scores( """Compute conformity scores. We need at least two cross validation errors to compute - quantiles for prediction intervals (`n_windows=2`, specified by self.conformal_intervals). + quantiles for prediction intervals (`n_windows=2`, specified by self.prediction_intervals). - The exception is raised by the ConformalIntervals data class. + The exception is raised by the PredictionIntervals data class. df: Optional[Union[DataFrame, SparkDataFrame, Sequence[str]]] = None, id_col: str = 'unique_id', @@ -1649,13 +1642,13 @@ def _conformity_scores( target_col: str = 'y', static_df: Optional[Union[DataFrame, SparkDataFrame]] = None, """ - if self.conformal_intervals is None: + if self.prediction_intervals is None: raise AttributeError( - "Please rerun the `fit` method passing a valid conformal_interval settings to compute conformity scores" + "Please rerun the `fit` method passing a valid prediction_interval setting to compute conformity scores" ) min_size = ufp.counts_by_id(df, id_col)["counts"].min() - min_samples = self.h * self.conformal_intervals.n_windows + 1 + min_samples = self.h * self.prediction_intervals.n_windows + 1 if min_size < min_samples: raise ValueError( "Minimum required samples in each serie for the prediction intervals " @@ -1666,7 +1659,7 @@ def _conformity_scores( cv_results = self.cross_validation( df=df, static_df=static_df, - n_windows=self.conformal_intervals.n_windows, + n_windows=self.prediction_intervals.n_windows, id_col=id_col, time_col=time_col, target_col=target_col, @@ -1674,9 +1667,7 @@ def _conformity_scores( kept = [time_col, id_col, "cutoff"] # conformity score for each model - for model in self._get_model_names( - conformal=True, enable_quantiles=self.conformal_intervals.enable_quantiles - ): + for model in self._get_model_names(add_level=True): kept.append(model) # compute absolute error for each model diff --git a/neuralforecast/utils.py b/neuralforecast/utils.py index 316b506e6..9d15ce1ca 100644 --- a/neuralforecast/utils.py +++ b/neuralforecast/utils.py @@ -4,8 +4,9 @@ __all__ = ['AirPassengers', 'AirPassengersDF', 'unique_id', 'ds', 'y', 'AirPassengersPanel', 'snaive', 'airline1_dummy', 'airline2_dummy', 'AirPassengersStatic', 'generate_series', 'TimeFeature', 'SecondOfMinute', 'MinuteOfHour', 'HourOfDay', 'DayOfWeek', 'DayOfMonth', 'DayOfYear', 'MonthOfYear', 'WeekOfYear', - 'time_features_from_frequency_str', 'augment_calendar_df', 'get_indexer_raise_missing', 'ConformalIntervals', - 'add_conformal_distribution_intervals', 'add_conformal_error_intervals', 'get_conformal_method'] + 'time_features_from_frequency_str', 'augment_calendar_df', 'get_indexer_raise_missing', + 'PredictionIntervals', 'add_conformal_distribution_intervals', 'add_conformal_error_intervals', + 'get_prediction_interval_method'] # %% ../nbs/utils.ipynb 3 import random @@ -451,25 +452,20 @@ def get_indexer_raise_missing(idx: pd.Index, vals: List[str]) -> List[int]: return idxs # %% ../nbs/utils.ipynb 31 -class ConformalIntervals: - """Class for storing conformal intervals metadata information.""" +class PredictionIntervals: + """Class for storing prediction intervals metadata information.""" def __init__( self, n_windows: int = 2, method: str = "conformal_distribution", - enable_quantiles: bool = False, ): """ n_windows : int Number of windows to evaluate. method : str, default is conformal_distribution - One of the supported methods for the computation of conformal prediction: + One of the supported methods for the computation of prediction intervals: conformal_error or conformal_distribution - enable_quantiles : bool, default is False - If set to True, we create prediction intervals on top of quantiled outputs, e.g. prediction made - with MQLoss(level=[80]) will be conformalized with the respective conformal - levels (prediction columns having 'model-lo/hi-80-conformal-lo/hi-#'). """ if n_windows < 2: raise ValueError( @@ -480,10 +476,11 @@ def __init__( raise ValueError(f"method must be one of {allowed_methods}") self.n_windows = n_windows self.method = method - self.enable_quantiles = enable_quantiles def __repr__(self): - return f"ConformalIntervals(n_windows={self.n_windows}, method='{self.method}')" + return ( + f"PredictionIntervals(n_windows={self.n_windows}, method='{self.method}')" + ) # %% ../nbs/utils.ipynb 32 def add_conformal_distribution_intervals( @@ -516,8 +513,8 @@ def add_conformal_distribution_intervals( axis=0, ) quantiles = quantiles.reshape(len(cuts), -1).T - lo_cols = [f"{model}-conformal-lo-{lv}" for lv in reversed(level)] - hi_cols = [f"{model}-conformal-hi-{lv}" for lv in level] + lo_cols = [f"{model}-lo-{lv}" for lv in reversed(level)] + hi_cols = [f"{model}-hi-{lv}" for lv in level] out_cols = lo_cols + hi_cols fcst_df = ufp.assign_columns(fcst_df, out_cols, quantiles) return fcst_df @@ -550,15 +547,15 @@ def add_conformal_error_intervals( axis=0, ) quantiles = quantiles.reshape(len(cuts), -1) - lo_cols = [f"{model}-conformal-lo-{lv}" for lv in reversed(level)] - hi_cols = [f"{model}-conformal-hi-{lv}" for lv in level] + lo_cols = [f"{model}-lo-{lv}" for lv in reversed(level)] + hi_cols = [f"{model}-hi-{lv}" for lv in level] quantiles = np.vstack([mean - quantiles[::-1], mean + quantiles]).T columns = lo_cols + hi_cols fcst_df = ufp.assign_columns(fcst_df, columns, quantiles) return fcst_df # %% ../nbs/utils.ipynb 34 -def get_conformal_method(method: str): +def get_prediction_interval_method(method: str): available_methods = { "conformal_distribution": add_conformal_distribution_intervals, "conformal_error": add_conformal_error_intervals,