diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 7f85c0cc2..50f194e51 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -14,3 +14,8 @@ replace = __version__ = "{new_version}" [bumpversion:file:doc/conf.py] search = version = "{current_version}" replace = version = "{new_version}" + +[bumpversion:file:CITATION.cff] +search = version = "{current_version}" +replace = version = "{new_version}" + diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b3487df6b..4298a96f1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,7 +33,7 @@ jobs: python-version: "3.11" numpy-version: 1.25.2 - os: macos-latest - python-version: "3.11" + python-version: "3.10" numpy-version: 1.25.2 defaults: run: @@ -48,6 +48,7 @@ jobs: python-version: ${{ matrix.python-version }} environment-file: environment.ci.yml channels: defaults, conda-forge + miniconda-version: "latest" - name: Install numpy run: conda install numpy=${{ matrix.numpy-version }} - name: Check linting diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 000000000..e22cd764d --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,48 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: +- family-names: "Cordier" + given-names: "Thibault" + orcid: "https://orcid.org/0000-0000-0000-0000" +title: "MAPIE - Model Agnostic Prediction Interval Estimator" +version: 0.8.3 +date-released: 2019-04-30 +url: "https://github.com/scikit-learn-contrib/MAPIE" +preferred-citation: + type: conference-paper + title: "Flexible and Systematic Uncertainty Estimation with Conformal Prediction via the MAPIE library" + authors: + - family-names: "Cordier" + given-names: "Thibault" + - family-names: "Blot" + given-names: "Vincent" + - family-names: "Lacombe" + given-names: "Louis" + - family-names: "Morzadec" + given-names: "Thomas" + - family-names: "Capitaine" + given-names: "Arnaud" + - family-names: "Brunel" + given-names: "Nicolas" + collection-title: "Conformal and Probabilistic Prediction with Applications" + collection-type: proceedings + pages: "549--581" + year: 2023 + organization: "PMLR" +old-citation: + type: article + authors: + - family-names: "Taquet" + given-names: "Vianney" + - family-names: "Blot" + given-names: "Vincent" + - family-names: "Morzadec" + given-names: "Thomas" + - family-names: "Lacombe" + given-names: "Louis" + - family-names: "Brunel" + given-names: "Nicolas" + doi: "10.48550/arXiv.2207.12274" + journal: "arXiv preprint arXiv:2207.12274" + title: "MAPIE: an open-source library for distribution-free uncertainty quantification" + year: 2021 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 59122bcbe..7563cc3c3 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -60,7 +60,7 @@ representative at an online or offline event. Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at -tcordier@quantmetry.com. +thibault.a.cordier@capgemini.com. All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the diff --git a/HISTORY.rst b/HISTORY.rst index f2faa2e8d..a2ac2db36 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,6 +2,16 @@ History ======= +0.8.3 (2024-**-**) +------------------ + +* Fix conda versionning. +* Reduce precision for test in `MapieCalibrator`. +* Fix invalid certificate when downloading data. +* Add citations utility to the documentation. +* Add documentation for metrics. +* Add explanation and example for symmetry argument in CQR. + 0.8.3 (2024-03-01) ------------------ @@ -14,7 +24,7 @@ History 0.8.2 (2024-01-11) ------------------ -* * Resolve issue still present in 0.8.1 by updating pandas. +* Resolve issue still present in 0.8.1 by updating pandas. 0.8.1 (2024-01-11) ------------------ diff --git a/README.rst b/README.rst index 3d6478c53..67d0db068 100644 --- a/README.rst +++ b/README.rst @@ -1,38 +1,39 @@ .. -*- mode: rst -*- -|GitHubActions|_ |Codecov|_ |ReadTheDocs|_ |License|_ |PythonVersion|_ |PyPi|_ |Conda|_ |Release|_ |Commits|_ |DOI|_ +|GitHubActions| |Codecov| |ReadTheDocs| |License| |PythonVersion| |PyPi| |Conda| |Release| |Commits| |DOI| .. |GitHubActions| image:: https://github.com/scikit-learn-contrib/MAPIE/actions/workflows/test.yml/badge.svg -.. _GitHubActions: https://github.com/scikit-learn-contrib/MAPIE/actions + :target: https://github.com/scikit-learn-contrib/MAPIE/actions .. |Codecov| image:: https://codecov.io/gh/scikit-learn-contrib/MAPIE/branch/master/graph/badge.svg?token=F2S6KYH4V1 -.. _Codecov: https://codecov.io/gh/scikit-learn-contrib/MAPIE + :target: https://codecov.io/gh/scikit-learn-contrib/MAPIE -.. |ReadTheDocs| image:: https://readthedocs.org/projects/mapie/badge -.. _ReadTheDocs: https://mapie.readthedocs.io/en/latest +.. |ReadTheDocs| image:: https://readthedocs.org/projects/mapie/badge/?version=stable + :target: https://mapie.readthedocs.io/en/stable/?badge=stable + :alt: Documentation Status -.. |License| image:: https://img.shields.io/github/license/simai-ml/MAPIE -.. _License: https://github.com/scikit-learn-contrib/MAPIE/blob/master/LICENSE +.. |License| image:: https://img.shields.io/github/license/scikit-learn-contrib/MAPIE + :target: https://github.com/scikit-learn-contrib/MAPIE/blob/master/LICENSE .. |PythonVersion| image:: https://img.shields.io/pypi/pyversions/mapie -.. _PythonVersion: https://pypi.org/project/mapie/ + :target: https://pypi.org/project/mapie/ .. |PyPi| image:: https://img.shields.io/pypi/v/mapie -.. _PyPi: https://pypi.org/project/mapie/ + :target: https://pypi.org/project/mapie/ .. |Conda| image:: https://img.shields.io/conda/vn/conda-forge/mapie -.. _Conda: https://anaconda.org/conda-forge/mapie + :target: https://anaconda.org/conda-forge/mapie .. |Release| image:: https://img.shields.io/github/v/release/scikit-learn-contrib/mapie -.. _Release: https://github.com/scikit-learn-contrib/MAPIE/releases + :target: https://github.com/scikit-learn-contrib/MAPIE/releases .. |Commits| image:: https://img.shields.io/github/commits-since/scikit-learn-contrib/mapie/latest/master -.. _Commits: https://github.com/scikit-learn-contrib/MAPIE/commits/master + :target: https://github.com/scikit-learn-contrib/MAPIE/commits/master .. |DOI| image:: https://img.shields.io/badge/10.48550/arXiv.2207.12274-B31B1B.svg -.. _DOI: https://arxiv.org/abs/2207.12274 + :target: https://arxiv.org/abs/2207.12274 -.. image:: https://github.com/simai-ml/MAPIE/raw/master/doc/images/mapie_logo_nobg_cut.png +.. image:: https://github.com/scikit-learn-contrib/MAPIE/raw/master/doc/images/mapie_logo_nobg_cut.png :width: 400 :align: center @@ -157,7 +158,7 @@ The full documentation can be found `on this link `_ so that we can align on the work to be done. +We encourage you to `open an issue `_ so that we can align on the work to be done. It is generally a good idea to have a quick discussion before opening a pull request that is potentially out-of-scope. For more information on the contribution process, please go `here `_. @@ -168,27 +169,32 @@ For more information on the contribution process, please go `here `_. +MAPIE is free and open-source software licensed under the `3-clause BSD license `_. + + +📚 Citation +=========== + +If you use MAPIE in your research, please cite using `citations file `_ on our repository. diff --git a/doc/index.rst b/doc/index.rst index d3b00dc18..b5450722b 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -58,6 +58,13 @@ examples_calibration/index notebooks_calibration +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: METRICS + + theoretical_description_metrics + .. toctree:: :maxdepth: 2 :hidden: diff --git a/doc/notebooks_classification.rst b/doc/notebooks_classification.rst index dc25e1ac2..35747de19 100755 --- a/doc/notebooks_classification.rst +++ b/doc/notebooks_classification.rst @@ -6,8 +6,8 @@ problems for computer vision settings that are too heavy to be included in the e galleries. -1. Estimating prediction sets on the Cifar10 dataset : `notebook `_ ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- +1. Estimating prediction sets on the Cifar10 dataset : `cifar_notebook `_ +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -2. Top-label calibration for outputs of ML models : `notebook `_ --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +2. Top-label calibration for outputs of ML models : `top_label_notebook `_ +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ diff --git a/doc/notebooks_multilabel_classification.rst b/doc/notebooks_multilabel_classification.rst index e9160169b..3826f7ff2 100644 --- a/doc/notebooks_multilabel_classification.rst +++ b/doc/notebooks_multilabel_classification.rst @@ -5,8 +5,8 @@ The following examples present advanced analyses on multi-label classification problems with different methods proposed in MAPIE. -1. Overview of Recall Control for Multi-Label Classification : `notebook `_ ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +1. Overview of Recall Control for Multi-Label Classification : `recall_notebook `_ +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -2. Overview of Precision Control for Multi-Label Classification : `notebook `_ ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- \ No newline at end of file +2. Overview of Precision Control for Multi-Label Classification : `precision_notebook `_ +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- \ No newline at end of file diff --git a/doc/notebooks_regression.rst b/doc/notebooks_regression.rst index 4ac493fa8..24b8ce12e 100755 --- a/doc/notebooks_regression.rst +++ b/doc/notebooks_regression.rst @@ -8,11 +8,11 @@ This section lists a series of Jupyter notebooks hosted on the MAPIE Github repo ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -2. Estimating the uncertainties in the exoplanet masses : `notebook `_ ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +2. Estimating the uncertainties in the exoplanet masses : `exoplanet_notebook `_ +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -3. Estimating prediction intervals for time series forecast with EnbPI and ACI : `notebook `_ --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +3. Estimating prediction intervals for time series forecast with EnbPI and ACI : `ts_notebook `_ +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/doc/quick_start.rst b/doc/quick_start.rst index 31e2efa97..3754f5ff5 100644 --- a/doc/quick_start.rst +++ b/doc/quick_start.rst @@ -7,11 +7,9 @@ In regression settings, **MAPIE** provides prediction intervals on single-output In classification settings, **MAPIE** provides prediction sets on multi-class data. In any case, **MAPIE** is compatible with any scikit-learn-compatible estimator. -Estimate your prediction intervals -================================== 1. Download and install the module ----------------------------------- +================================== Install via ``pip``: @@ -33,7 +31,7 @@ To install directly from the github repository : 2. Run MapieRegressor ---------------------- +===================== Let us start with a basic regression problem. Here, we generate one-dimensional noisy data that we fit with a linear model. @@ -114,8 +112,8 @@ It is given by the alpha parameter defined in ``MapieRegressor``, here equal to thus giving target coverages of ``0.95`` and ``0.68``. The effective coverage is the actual fraction of true labels lying in the prediction intervals. -2. Run MapieClassifier ----------------------- +3. Run MapieClassifier +======================= Similarly, it's possible to do the same for a basic classification problem. diff --git a/doc/theoretical_description_binary_classification.rst b/doc/theoretical_description_binary_classification.rst index 3ca8b3b77..9c8f6f336 100644 --- a/doc/theoretical_description_binary_classification.rst +++ b/doc/theoretical_description_binary_classification.rst @@ -1,10 +1,10 @@ -.. title:: Theoretical Description : contents +.. title:: Theoretical Description Binary Classification : contents .. _theoretical_description_binay_classification: -======================= +####################### Theoretical Description -======================= +####################### There are mainly three different ways to handle uncertainty quantification in binary classification: calibration (see :doc:`theoretical_description_calibration`), confidence interval (CI) for the probability @@ -12,7 +12,7 @@ calibration (see :doc:`theoretical_description_calibration`), confidence interva These 3 notions are tightly related for score-based classifier, as it is shown in [1]. Prediction sets can be computed in the same way for multiclass and binary classification with -:class:`~mapie.calibration.MapieClassifier`, and there are the same theoretical guarantees. +:class:`~mapie.classification.MapieClassifier`, and there are the same theoretical guarantees. Nevertheless, prediction sets are often much less informative in the binary case than in the multiclass case. From Gupta et al [1]: @@ -83,8 +83,8 @@ for the labels of test objects which are guaranteed to be well-calibrated under that the observations are generated independently from the same distribution [2]. -4. References -------------- +References +---------- [1] Gupta, Chirag, Aleksandr Podkopaev, and Aaditya Ramdas. "Distribution-free binary classification: prediction sets, confidence intervals, and calibration." diff --git a/doc/theoretical_description_calibration.rst b/doc/theoretical_description_calibration.rst index 21df15f2d..c62540337 100644 --- a/doc/theoretical_description_calibration.rst +++ b/doc/theoretical_description_calibration.rst @@ -2,10 +2,9 @@ .. _theoretical_description_calibration: -======================= +####################### Theoretical Description -======================= - +####################### One method for multi-class calibration has been implemented in MAPIE so far : Top-Label Calibration [1]. @@ -34,8 +33,8 @@ To apply calibration directly to a multi-class context, Gupta et al. propose a f a multi-class calibration to multiple binary calibrations (M2B). -1. Top-Label ------------- +Top-Label +--------- Top-Label calibration is a calibration technique introduced by Gupta et al. to calibrate the model according to the highest score and the corresponding class (see [1] Section 2). This framework offers to apply binary calibration techniques to multi-class calibration. @@ -50,109 +49,8 @@ according to Top-Label calibration if: Pr(Y = c(X) \mid h(X), c(X)) = h(X) -2. Metrics for calibration --------------------------- - -**Expected calibration error** - -The main metric to check if the calibration is correct is the Expected Calibration Error (ECE). It is based on two -components, accuracy and confidence per bin. The number of bins is a hyperparamater :math:`M`, and we refer to a specific bin by -:math:`B_m`. - -.. math:: - \text{acc}(B_m) &= \frac{1}{\left| B_m \right|} \sum_{i \in B_m} {y}_i \\ - \text{conf}(B_m) &= \frac{1}{\left| B_m \right|} \sum_{i \in B_m} \hat{f}(x)_i - - -The ECE is the combination of these two metrics combined. - -.. math:: - \text{ECE} = \sum_{m=1}^M \frac{\left| B_m \right|}{n} \left| acc(B_m) - conf(B_m) \right| - -In simple terms, once all the different bins from the confidence scores have been created, we check the mean accuracy of each bin. -The absolute mean difference between the two is the ECE. Hence, the lower the ECE, the better the calibration was performed. - -**Top-Label ECE** - -In the top-label calibration, we only calculate the ECE for the top-label class. Hence, per top-label class, we condition the calculation -of the accuracy and confidence based on the top label and take the average ECE for each top-label. - -3. Statistical tests for calibration ------------------------------------- - -**Kolmogorov-Smirnov test** - -Kolmogorov-Smirnov test was derived in [2, 3, 4]. The idea is to consider the cumulative differences between sorted scores :math:`s_i` -and their corresponding labels :math:`y_i` and to compare its properties to that of a standard Brownian motion. Let us consider the -cumulative differences on sorted scores: - -.. math:: - C_k = \frac{1}{N}\sum_{i=1}^k (s_i - y_i) - -We also introduce a typical normalization scale :math:`\sigma`: - -.. math:: - \sigma = \frac{1}{N}\sqrt{\sum_{i=1}^N s_i(1 - s_i)} - -The Kolmogorov-Smirnov statistic is then defined as : - -.. math:: - G = \max|C_k|/\sigma - -It can be shown [2] that, under the null hypothesis of well-calibrated scores, this quantity asymptotically (i.e. when N goes to infinity) -converges to the maximum absolute value of a standard Brownian motion over the unit interval :math:`[0, 1]`. [3, 4] also provide closed-form -formulas for the cumulative distribution function (CDF) of the maximum absolute value of such a standard Brownian motion. -So we state the p-value associated to the statistical test of well calibration as: - -.. math:: - p = 1 - CDF(G) - -**Kuiper test** - -Kuiper test was derived in [2, 3, 4] and is very similar to Kolmogorov-Smirnov. This time, the statistic is defined as: - -.. math:: - H = (\max_k|C_k| - \min_k|C_k|)/\sigma - -It can be shown [2] that, under the null hypothesis of well-calibrated scores, this quantity asymptotically (i.e. when N goes to infinity) -converges to the range of a standard Brownian motion over the unit interval :math:`[0, 1]`. [3, 4] also provide closed-form -formulas for the cumulative distribution function (CDF) of the range of such a standard Brownian motion. -So we state the p-value associated to the statistical test of well calibration as: - -.. math:: - p = 1 - CDF(H) - -**Spiegelhalter test** - -Spiegelhalter test was derived in [6]. It is based on a decomposition of the Brier score: - -.. math:: - B = \frac{1}{N}\sum_{i=1}^N(y_i - s_i)^2 - -where scores are denoted :math:`s_i` and their corresponding labels :math:`y_i`. This can be decomposed in two terms: - -.. math:: - B = \frac{1}{N}\sum_{i=1}^N(y_i - s_i)(1 - 2s_i) + \frac{1}{N}\sum_{i=1}^N s_i(1 - s_i) - -It can be shown that the first term has an expected value of zero under the null hypothesis of well calibration. So we interpret -the second term as the Brier score expected value :math:`E(B)` under the null hypothesis. As for the variance of the Brier score, it can be -computed as: - -.. math:: - Var(B) = \frac{1}{N^2}\sum_{i=1}^N(1 - 2s_i)^2 s_i(1 - s_i) - -So we can build a Z-score as follows: - -.. math:: - Z = \frac{B - E(B)}{\sqrt{Var(B)}} = \frac{\sum_{i=1}^N(y_i - s_i)(1 - 2s_i)}{\sqrt{\sum_{i=1}^N(1 - 2s_i)^2 s_i(1 - s_i)}} - -This statistic follows a normal distribution of cumulative distribution CDF so that we state the associated p-value: - -.. math:: - p = 1 - CDF(Z) - -3. References -------------- +References +---------- [1] Gupta, Chirag, and Aaditya K. Ramdas. "Top-label calibration and multiclass-to-binary reductions." @@ -171,8 +69,7 @@ arXiv preprint arXiv:2202.00100. [4] D. A. Darling. A. J. F. Siegert. The First Passage Problem for a Continuous Markov Process. -Ann. Math. Statist. 24 (4) 624 - 639, December, -1953. +Ann. Math. Statist. 24 (4) 624 - 639, December, 1953. [5] William Feller. The Asymptotic Distribution of the Range of Sums of diff --git a/doc/theoretical_description_classification.rst b/doc/theoretical_description_classification.rst index aa5c08060..445fcfe42 100644 --- a/doc/theoretical_description_classification.rst +++ b/doc/theoretical_description_classification.rst @@ -1,11 +1,10 @@ -.. title:: Theoretical Description : contents +.. title:: Theoretical Description Classification : contents .. _theoretical_description_classification: -======================= +####################### Theoretical Description -======================= - +####################### Three methods for multi-class uncertainty quantification have been implemented in MAPIE so far : LAC (that stands for Least Ambiguous set-valued Classifier) [1], Adaptive Prediction Sets [2, 3] and Top-K [3]. @@ -141,8 +140,10 @@ Despite the RAPS method having a relatively small set size, its coverage tends t of the last label in the prediction set. This randomization is done as follows: - First : define the :math:`V` parameter: + .. math:: V_i = (s_i(X_i, Y_i) - \hat{q}_{1-\alpha}) / \left(\hat{\mu}(X_i)_{\pi_k} + \lambda \mathbb{1} (k > k_{reg})\right) + - Compare each :math:`V_i` to :math:`U \sim` Unif(0, 1) - If :math:`V_i \leq U`, the last included label is removed, else we keep the prediction set as it is. @@ -227,8 +228,8 @@ where : .. TO BE CONTINUED -5. References -------------- +References +---------- [1] Mauricio Sadinle, Jing Lei, & Larry Wasserman. "Least Ambiguous Set-Valued Classifiers With Bounded Error Levels." diff --git a/doc/theoretical_description_conformity_scores.rst b/doc/theoretical_description_conformity_scores.rst index b280fc530..5ec0aee4d 100644 --- a/doc/theoretical_description_conformity_scores.rst +++ b/doc/theoretical_description_conformity_scores.rst @@ -1,10 +1,10 @@ -.. title:: Theoretical Description : contents +.. title:: Theoretical Description Conformity Scores : contents .. _theoretical_description_conformity_scores: -============================================= +############################################# Theoretical Description for Conformity Scores -============================================= +############################################# The :class:`mapie.conformity_scores.ConformityScore` class implements various methods to compute conformity scores for regression. @@ -25,7 +25,7 @@ quantiles will be computed : one on the right side of the distribution and the other on the left side. 1. The absolute residual score -============================== +------------------------------ The absolute residual score (:class:`mapie.conformity_scores.AbsoluteConformityScore`) is the simplest and most commonly used conformal score, it translates the error @@ -44,7 +44,7 @@ With this score, the intervals of predictions will be constant over the whole da This score is by default symmetric (*see above for definition*). 2. The gamma score -================== +------------------ The gamma score [2] (:class:`mapie.conformity_scores.GammaConformityScore`) adds a notion of adaptivity with the normalization of the residuals by the predictions. @@ -69,7 +69,7 @@ the order of magnitude of the predictions, implying that this score should be us in use cases where we want greater uncertainty when the prediction is high. 3. The residual normalized score -======================================= +-------------------------------- The residual normalized score [1] (:class:`mapie.conformity_scores.ResidualNormalisedScore`) is slightly more complex than the previous scores. @@ -97,7 +97,7 @@ it is not proportional to the uncertainty. Key takeaways -============= +------------- - The absolute residual score is the basic conformity score and gives constant intervals. It is the one used by default by :class:`mapie.regression.MapieRegressor`. - The gamma conformity score adds a notion of adaptivity by giving intervals of different sizes @@ -107,7 +107,7 @@ Key takeaways without specific assumptions on the data. References -========== +---------- [1] Lei, J., G'Sell, M., Rinaldo, A., Tibshirani, R. J., & Wasserman, L. (2018). Distribution-Free Predictive Inference for Regression. Journal of the American Statistical Association, 113(523), 1094–1111. diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst new file mode 100644 index 000000000..398fdd7bb --- /dev/null +++ b/doc/theoretical_description_metrics.rst @@ -0,0 +1,318 @@ +.. title:: Theoretical Description Metrics : contents + +.. _theoretical_description_metrics: + +####################### +Theoretical Description +####################### + +This document provides detailed descriptions of various metrics used to evaluate the performance of predictive models, particularly focusing on their ability to estimate uncertainties and calibrate predictions accurately. + +1. General Metrics +================== + +Regression Coverage Score +------------------------- + +The **Regression Coverage Score (RCS)** calculates the fraction of true outcomes that fall within the provided prediction intervals. + +.. math:: + + RCS = \frac{1}{n} \sum_{i=1}^{n} \mathbf{1}(\hat y^{\text{low}}_{i} \leq y_{i} \leq \hat y^{\text{up}}_{i}) + +where: + +- :math:`n` is the number of samples, +- :math:`y_{i}` is the true value for the :math:`i`-th sample, +- :math:`\hat y^{\text{low}}_{i}` and :math:`\hat y^{\text{up}}_{i}` are the lower and upper bounds of the prediction intervals, respectively. + +Regression Mean Width Score +--------------------------- + +The **Regression Mean Width Score (RMWS)** assesses the average width of the prediction intervals provided by the model. + +.. math:: + + \text{RMWS} = \frac{1}{n} \sum_{i=1}^{n} (\hat y^{\text{up}}_{i} - \hat y^{\text{low}}_{i}) + +Classification Coverage Score +----------------------------- + +The **Classification Coverage Score (CCS)** measures how often the true class labels fall within the predicted sets. + +.. math:: + + CCS = \frac{1}{n} \sum_{i=1}^{n} \mathbf{1}(y_{i} \in \hat C(x_{i})) + +Here, :math:`\hat C(x_{i})` represents the set of predicted labels that could possibly contain the true label for the :math:`i`-th observation :math:`x_{i}`. + +Classification Mean Width Score +------------------------------- + +For classification tasks, the **Classification Mean Width Score (CMWS)** calculates the average size of the prediction sets across all samples. + +.. math:: + + \text{CMWS} = \frac{1}{n} \sum_{i=1}^{n} |\hat C(x_i)| + +where :math:`|\hat C(x_i)|` denotes the number of classes included in the prediction set for sample :math:`i`. + +Size-Stratified Coverage +------------------------- + +**Size-Stratified Coverage (SSC)** evaluates how the size of prediction sets or intervals affects their ability to cover the true outcomes [1]. It's calculated separately for classification and regression: + +**Regression:** + +.. math:: + + \text{SSC}_{\text{regression}} = \sum_{k=1}^{K} \left( \frac{1}{|I_k|} \sum_{i \in I_k} \mathbf{1}(\hat y^{\text{low}}_{i} \leq y_{i} \leq \hat y^{\text{up}}_{i}) \right) + +**Classification:** + +.. math:: + + \text{SSC}_{\text{classification}} = \sum_{k=1}^{K} \left( \frac{1}{|S_k|} \sum_{i \in S_k} \mathbf{1}(y_{i} \in \hat C(x_i)) \right) + +where: + +- :math:`K` is the number of distinct size groups, +- :math:`I_k` and :math:`S_k` are the indices of samples whose prediction intervals or sets belong to the :math:`k`-th size group. + +Hilbert-Schmidt Independence Criterion +--------------------------------------- + +The **Hilbert-Schmidt Independence Criterion (HSIC)** is a non-parametric measure of independence between two variables, applied here to test the independence of interval sizes from their coverage indicators [4]. + +.. math:: + + \text{HSIC} = \operatorname{trace}(\mathbf{H} \mathbf{K} \mathbf{H} \mathbf{L}) + +where: + +- :math:`\mathbf{K}` and :math:`\mathbf{L}` are the kernel matrices representing the interval sizes and coverage indicators, respectively. +- :math:`\mathbf{H}` is the centering matrix, :math:`\mathbf{H} = \mathbf{I} - \frac{1}{n} \mathbf{11}^\top`. + +This measure is crucial for determining whether certain sizes of prediction intervals are systematically more or less likely to contain the true values, which can highlight biases in interval-based predictions. + +Coverage Width-Based Criterion +------------------------------ + +The **Coverage Width-Based Criterion (CWC)** evaluates prediction intervals by balancing their empirical coverage and width. It is designed to both reward narrow intervals and penalize those that do not achieve a specified coverage probability [6]. + +.. math:: + + \text{CWC} = (1 - \text{Mean Width Score}) \times \exp\left(-\eta \times (\text{Coverage Score} - (1-\alpha))^2\right) + +Mean Winkler Interval Score +--------------------------- + +The **Mean Winkler Interval (MWI) Score** evaluates prediction intervals by combining their width with a penalty for intervals that do not contain the observation [8, 10]. + +.. math:: + + \text{MWI Score} = \frac{1}{n} \sum_{i=1}^{n} (\hat y^{\text{up}}_{i} - \hat y^{\text{low}}_{i}) + \frac{2}{\alpha} \sum_{i=1}^{n} \max(0, |y_{i} - \hat y^{\text{boundary}}_{i}|) + +where :math:`\hat y^{\text{boundary}}_{i}` is the nearest interval boundary not containing :math:`y_{i}`, and :math:`\alpha` is the significance level. + +2. Calibration Metrics +====================== + + +Expected Calibration Error +-------------------------- + +The **Expected Calibration Error** (ECE) is a metric used to evaluate how well the predicted probabilities of a model align with the actual outcomes. It measures the difference between predicted confidence levels and actual accuracy. The process involves dividing the predictions into bins based on confidence scores and then comparing the accuracy within each bin to the average confidence level of the predictions in that bin. The number of bins is a hyperparameter :math:`M`, and we refer to a specific bin by :math:`B_m`. + +For each bin :math:`B_m`, the accuracy and confidence are defined as follows: + +.. math:: + + \text{acc}(B_m) = \frac{1}{\left| B_m \right|} \sum_{i \in B_m} y_i + +.. math:: + + \text{conf}(B_m) = \frac{1}{\left| B_m \right|} \sum_{i \in B_m} \hat{f}(x_i) + +The ECE is then calculated using the following formula: + +.. math:: + + \text{ECE} = \sum_{m=1}^M \frac{\left| B_m \right|}{n} \left| \text{acc}(B_m) - \text{conf}(B_m) \right| + +where: + +- :math:`B_m` is the set of indices of samples that fall into the :math:`m`-th bin. +- :math:`\left| B_m \right|` is the number of samples in the :math:`m`-th bin. +- :math:`n` is the total number of samples. +- :math:`\text{acc}(B_m)` is the accuracy within the :math:`m`-th bin. +- :math:`\text{conf}(B_m)` is the average confidence score within the :math:`m`-th bin. + +In simple terms, once the different bins from the confidence scores have been created, we check the mean accuracy of each bin. The absolute mean difference between the two is the ECE. Hence, the lower the ECE, the better the calibration was performed. The difference between the average confidence and the actual accuracy within each bin is weighted by the proportion of samples in that bin, ensuring that bins with more samples have a larger influence on the final ECE value. + +Top-Label Expected Calibration Error (Top-Label ECE) +---------------------------------------------------- + +The **Top-Label Expected Calibration Error** (Top-Label ECE) extends the concept of ECE to the multi-class setting. Instead of evaluating calibration over all predicted probabilities, Top-Label ECE focuses on the calibration of the most confident prediction (top-label) for each sample. For the top-label class, the calculation of the accuracy and confidence is conditioned on the top label, and the average ECE is taken for each top-label. + +The Top-Label ECE is calculated as follows: + +.. math:: + + \text{Top-Label ECE} = \frac{1}{L} \sum_{j=1}^L \sum_{i=1}^B \frac{|B_{i,j}|}{n_j} \left| \text{acc}(B_{i,j}) - \text{conf}(B_{i,j}) \right| + +where: + +- :math:`L` is the number of unique labels. +- :math:`B_{i,j}` is the set of indices of samples that fall into the :math:`i`-th bin for label :math:`j`. +- :math:`\left| B_{i,j} \right|` is the number of samples in the :math:`i`-th bin for label :math:`j`. +- :math:`n_j` is the total number of samples for label :math:`j`. +- :math:`\text{acc}(B_{i,j})` is the accuracy within the :math:`i`-th bin for label :math:`j`. +- :math:`\text{conf}(B_{i,j})` is the average confidence score within the :math:`i`-th bin for label :math:`j`. +- :math:`B` is the total number of bins. + +For each label, the predictions are binned according to their confidence scores for that label. The calibration error is then calculated for each label separately and averaged across all labels to obtain the final Top-Label ECE value. This ensures that the calibration is measured specifically for the most confident prediction, which is often the most critical for decision-making in multi-class problems. + +Cumulative Differences +---------------------- + +**Cumulative Differences** calculates the cumulative differences between sorted true values and prediction scores, helping to understand how well the prediction scores correspond to the actual outcomes when both are ordered by the score [2]. + +.. math:: + + \text{Cumulative Differences} = \frac{1}{n} \sum_{i=1}^{n} (y_{\sigma_1(i)} - \hat y_{\sigma_2(i)}) + +where: + +- :math:`\sigma_1` is the permutation which sorts all the true values. +- :math:`\sigma_2` is the permutation which sorts all the predicted values. + +Kolmogorov-Smirnov Statistic for Calibration +-------------------------------------------- + +The **Kolmogorov-Smirnov test** was derived in [2, 3, 11]. The idea is to consider the cumulative differences between sorted scores :math:`s_i` +and their corresponding labels :math:`y_i` and to compare its properties to that of a standard Brownian motion. Let us consider the +cumulative differences on sorted scores: + +.. math:: + C_k = \frac{1}{N}\sum_{i=1}^k (s_i - y_i) + +We also introduce a typical normalization scale :math:`\sigma`: + +.. math:: + \sigma = \frac{1}{N}\sqrt{\sum_{i=1}^N s_i(1 - s_i)} + +The Kolmogorov-Smirnov statistic is then defined as : + +.. math:: + G = \max|C_k|/\sigma + +It can be shown [2] that, under the null hypothesis of well-calibrated scores, this quantity asymptotically (i.e. when N goes to infinity) +converges to the maximum absolute value of a standard Brownian motion over the unit interval :math:`[0, 1]`. [3, 11] also provide closed-form +formulas for the cumulative distribution function (CDF) of the maximum absolute value of such a standard Brownian motion. +So we state the p-value associated to the statistical test of well calibration as: + +.. math:: + p = 1 - CDF(G) + +Kuiper's Test +------------- + +The **Kuiper test** was derived in [2, 3, 11] and is very similar to Kolmogorov-Smirnov. This time, the statistic is defined as: + +.. math:: + H = (\max_k|C_k| - \min_k|C_k|)/\sigma + +It can be shown [2] that, under the null hypothesis of well-calibrated scores, this quantity asymptotically (i.e. when N goes to infinity) +converges to the range of a standard Brownian motion over the unit interval :math:`[0, 1]`. [3, 11] also provide closed-form +formulas for the cumulative distribution function (CDF) of the range of such a standard Brownian motion. +So we state the p-value associated to the statistical test of well calibration as: + +.. math:: + p = 1 - CDF(H) + +Spiegelhalter’s Test +-------------------- + +The **Spiegelhalter test** was derived in [9]. It is based on a decomposition of the Brier score: + +.. math:: + B = \frac{1}{N}\sum_{i=1}^N(y_i - s_i)^2 + +where scores are denoted :math:`s_i` and their corresponding labels :math:`y_i`. This can be decomposed in two terms: + +.. math:: + B = \frac{1}{N}\sum_{i=1}^N(y_i - s_i)(1 - 2s_i) + \frac{1}{N}\sum_{i=1}^N s_i(1 - s_i) + +It can be shown that the first term has an expected value of zero under the null hypothesis of well calibration. So we interpret +the second term as the Brier score expected value :math:`E(B)` under the null hypothesis. As for the variance of the Brier score, it can be +computed as: + +.. math:: + Var(B) = \frac{1}{N^2}\sum_{i=1}^N(1 - 2s_i)^2 s_i(1 - s_i) + +So we can build a Z-score as follows: + +.. math:: + Z = \frac{B - E(B)}{\sqrt{Var(B)}} = \frac{\sum_{i=1}^N(y_i - s_i)(1 - 2s_i)}{\sqrt{\sum_{i=1}^N(1 - 2s_i)^2 s_i(1 - s_i)}} + +This statistic follows a normal distribution of cumulative distribution CDF so that we state the associated p-value: + +.. math:: + p = 1 - CDF(Z) + + +References +========== + +[1] Angelopoulos, A. N., & Bates, S. (2021). +A gentle introduction to conformal prediction and +distribution-free uncertainty quantification. +arXiv preprint arXiv:2107.07511. + +[2] Arrieta-Ibarra I, Gujral P, Tannen J, Tygert M, Xu C. +Metrics of calibration for probabilistic predictions. +The Journal of Machine Learning Research. 2022 Jan 1;23(1):15886-940. + +[3] D. A. Darling. A. J. F. Siegert. +The First Passage Problem for a Continuous Markov Process. +Ann. Math. Statist. 24 (4) 624 - 639, December, 1953. + +[4] Feldman, S., Bates, S., & Romano, Y. (2021). +Improving conditional coverage via orthogonal quantile regression. +Advances in Neural Information Processing Systems, 34, 2060-2071. + +[5] Gupta, Chirag, and Aaditya K. Ramdas. +"Top-label calibration and multiclass-to-binary reductions." +arXiv preprint arXiv:2107.08353 (2021). + +[6] Khosravi, Abbas, Saeid Nahavandi, and Doug Creighton. +"Construction of optimal prediction intervals for load forecasting +problems." +IEEE Transactions on Power Systems 25.3 (2010): 1496-1503. + +[7] Naeini, Mahdi Pakdaman, Gregory Cooper, and Milos Hauskrecht. +"Obtaining well calibrated probabilities using bayesian binning." +Twenty-Ninth AAAI Conference on Artificial Intelligence. 2015. + +[8] Robert L. Winkler +"A Decision-Theoretic Approach to Interval Estimation", +Journal of the American Statistical Association, +volume 67, pages 187-191 (1972) +(https://doi.org/10.1080/01621459.1972.10481224) + +[9] Spiegelhalter DJ. +Probabilistic prediction in patient management and clinical trials. +Statistics in medicine. +1986 Sep;5(5):421-33. + +[10] Tilmann Gneiting and Adrian E Raftery +"Strictly Proper Scoring Rules, Prediction, and Estimation", +Journal of the American Statistical Association, +volume 102, pages 359-378 (2007) +(https://doi.org/10.1198/016214506000001437) (Section 6.2) + +[11] Tygert M. +Calibration of P-values for calibration and for deviation +of a subpopulation from the full population. +arXiv preprint arXiv:2202.00100.2022 Jan 31. diff --git a/doc/theoretical_description_multilabel_classification.rst b/doc/theoretical_description_multilabel_classification.rst index 23e0536c4..e3ff05da3 100644 --- a/doc/theoretical_description_multilabel_classification.rst +++ b/doc/theoretical_description_multilabel_classification.rst @@ -1,11 +1,10 @@ -.. title:: Theoretical Description : contents +.. title:: Theoretical Description Multi label Classification : contents .. _theoretical_description_multilabel_classification: -======================= +####################### Theoretical Description -======================= - +####################### Three methods for multi-label uncertainty quantification have been implemented in MAPIE so far : Risk-Controlling Prediction Sets (RCPS) [1], Conformal Risk Control (CRC) [2] and Learn Then Test (LTT) [3]. @@ -38,7 +37,7 @@ Notice that at the opposite of the other two methods, LTT allows to control any we use CRC and RCPS for recall control and LTT for precision control. 1. Risk-Controlling Prediction Sets ------------------------------------ +=================================== 1.1. General settings --------------------- @@ -143,7 +142,7 @@ Then: 2. Conformal Risk Control -------------------------- +========================= The goal of this method is to control any monotone and bounded loss. The result of this method can be expressed as follows: @@ -166,10 +165,8 @@ With : 3. Learn Then Test ------------------- +================== -3.1. General settings ---------------------- We are going to present the Learn Then Test framework that allows the user to control non-monotonic risk such as precision score. This method has been introduced in article [3]. The settings here are the same as RCPS and CRC, we just need to introduce some new parameters: @@ -200,8 +197,8 @@ In order to find all the parameters :math:`\lambda` that satisfy the above condi that controls the family-wise error rate (FWER), for example, Bonferonni correction. -4. References -------------- +References +========== [1] Lihua Lei Jitendra Malik Stephen Bates, Anastasios Angelopoulos, and Michael I. Jordan. Distribution-free, risk-controlling prediction diff --git a/doc/theoretical_description_regression.rst b/doc/theoretical_description_regression.rst index ae4b7c346..09c55e74c 100644 --- a/doc/theoretical_description_regression.rst +++ b/doc/theoretical_description_regression.rst @@ -1,10 +1,10 @@ -.. title:: Theoretical Description : contents +.. title:: Theoretical Description Regression : contents .. _theoretical_description_regression: -======================= +####################### Theoretical Description -======================= +####################### The :class:`mapie.regression.MapieRegressor` class uses various resampling methods based on the jackknife strategy @@ -58,7 +58,7 @@ The figure below illustrates the naive method. :align: center 2. The split method -===================== +=================== The so-called split method computes the residuals of a calibration dataset to estimate the typical error obtained on a new test data point. @@ -245,30 +245,43 @@ uncertainty is higher than :math:`CV+`, because the models' prediction spread is then higher. -9. The conformalized quantile regression (CQR) method +9. The Conformalized Quantile Regression (CQR) Method ===================================================== -The conformalized quantile method allows for better interval widths with -heteroscedastic data. It uses quantile regressors with different quantile -values to estimate the prediction bounds and the residuals of these methods are -used to create the guaranteed coverage value. +The conformalized quantile regression (CQR) method allows for better interval widths with +heteroscedastic data. It uses quantile regressors with different quantile values to estimate +the prediction bounds. The residuals of these methods are used to create the guaranteed +coverage value. + +Notations and Definitions +------------------------- +- :math:`\mathcal{I}_1` is the set of indices of the data in the training set. +- :math:`\mathcal{I}_2` is the set of indices of the data in the calibration set. +- :math:`\hat{q}_{\alpha_{\text{low}}}`: Lower quantile model trained on :math:`{(X_i, Y_i) : i \in \mathcal{I}_1}`. +- :math:`\hat{q}_{\alpha_{\text{high}}}`: Upper quantile model trained on :math:`{(X_i, Y_i) : i \in \mathcal{I}_1}`. +- :math:`E_i`: Residuals for the i-th sample in the calibration set. +- :math:`E_{\text{low}}`: Residuals from the lower quantile model. +- :math:`E_{\text{high}}`: Residuals from the upper quantile model. +- :math:`Q_{1-\alpha}(E, \mathcal{I}_2)`: The :math:`(1-\alpha)(1+1/|\mathcal{I}_2|)`-th empirical quantile of the set :math:`{E_i : i \in \mathcal{I}_2}`. + +Mathematical Formulation +------------------------ +The prediction interval :math:`\hat{C}_{n, \alpha}^{\text{CQR}}(X_{n+1})` for a new sample :math:`X_{n+1}` is given by: -.. math:: +.. math:: + + \hat{C}_{n, \alpha}^{\text{CQR}}(X_{n+1}) = + [\hat{q}_{\alpha_{\text{lo}}}(X_{n+1}) - Q_{1-\alpha}(E_{\text{low}}, \mathcal{I}_2), + \hat{q}_{\alpha_{\text{hi}}}(X_{n+1}) + Q_{1-\alpha}(E_{\text{high}}, \mathcal{I}_2)] - \hat{C}_{n, \alpha}^{\rm CQR}(X_{n+1}) = - [\hat{q}_{\alpha_{lo}}(X_{n+1}) - Q_{1-\alpha}(E_{low}, \mathcal{I}_2), - \hat{q}_{\alpha_{hi}}(X_{n+1}) + Q_{1-\alpha}(E_{high}, \mathcal{I}_2)] +Where: -Where :math:`Q_{1-\alpha}(E, \mathcal{I}_2) := (1-\alpha)(1+1/ |\mathcal{I}_2|)`-th -empirical quantile of :math:`{E_i : i \in \mathcal{I}_2}` and :math:`\mathcal{I}_2` is the -residuals of the estimator fitted on the calibration set. Note that in the symmetric method, -:math:`E_{low}` and :math:`E_{high}` are equal. +- :math:`\hat{q}_{\alpha_{\text{lo}}}(X_{n+1})` is the predicted lower quantile for the new sample. +- :math:`\hat{q}_{\alpha_{\text{hi}}}(X_{n+1})` is the predicted upper quantile for the new sample. -As justified by [3], this method offers a theoretical guarantee of the target coverage -level :math:`1-\alpha`. +Note: In the symmetric method, :math:`E_{\text{low}}` and :math:`E_{\text{high}}` sets are no longer distinct. We consider directly the union set :math:`E_{\text{all}} = E_{\text{low}} \cup E_{\text{high}}` and the empirical quantile is then calculated on all the absolute (positive) residuals. -Note that only the split method has been implemented and that it will run three separate -regressions when using :class:`mapie.quantile_regression.MapieQuantileRegressor`. +As justified by the literature, this method offers a theoretical guarantee of the target coverage level :math:`1-\alpha`. 10. The ensemble batch prediction intervals (EnbPI) method diff --git a/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py b/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py new file mode 100644 index 000000000..aab634638 --- /dev/null +++ b/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py @@ -0,0 +1,114 @@ +""" +==================================== +Plotting CQR with symmetric argument +==================================== +An example plot of :class:`~mapie.quantile_regression.MapieQuantileRegressor` +illustrating the impact of the symmetry parameter. +""" +import numpy as np +from matplotlib import pyplot as plt +from sklearn.datasets import make_regression +from sklearn.ensemble import GradientBoostingRegressor + +from mapie.metrics import regression_coverage_score +from mapie.quantile_regression import MapieQuantileRegressor + +random_state = 2 + +############################################################################## +# We generate a synthetic data. + +X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=59) + +# Define alpha level +alpha = 0.2 + +# Fit a Gradient Boosting Regressor for quantile regression +gb_reg = GradientBoostingRegressor( + loss="quantile", alpha=0.5, random_state=random_state +) + +# MAPIE Quantile Regressor +mapie_qr = MapieQuantileRegressor(estimator=gb_reg, alpha=alpha) +mapie_qr.fit(X, y, random_state=random_state) +y_pred_sym, y_pis_sym = mapie_qr.predict(X, symmetry=True) +y_pred_asym, y_pis_asym = mapie_qr.predict(X, symmetry=False) +y_qlow = mapie_qr.estimators_[0].predict(X) +y_qup = mapie_qr.estimators_[1].predict(X) + +# Calculate coverage scores +coverage_score_sym = regression_coverage_score( + y, y_pis_sym[:, 0], y_pis_sym[:, 1] +) +coverage_score_asym = regression_coverage_score( + y, y_pis_asym[:, 0], y_pis_asym[:, 1] +) + +# Sort the values for plotting +order = np.argsort(X[:, 0]) +X_sorted = X[order] +y_pred_sym_sorted = y_pred_sym[order] +y_pis_sym_sorted = y_pis_sym[order] +y_pred_asym_sorted = y_pred_asym[order] +y_pis_asym_sorted = y_pis_asym[order] +y_qlow = y_qlow[order] +y_qup = y_qup[order] + +############################################################################## +# We will plot the predictions and prediction intervals for both symmetric +# and asymmetric intervals. The line represents the predicted values, the +# dashed lines represent the prediction intervals, and the shaded area +# represents the symmetric and asymmetric prediction intervals. + +plt.figure(figsize=(14, 7)) + +plt.subplot(1, 2, 1) +plt.xlabel("x") +plt.ylabel("y") +plt.scatter(X, y, alpha=0.3) +plt.plot(X_sorted, y_qlow, color="C1") +plt.plot(X_sorted, y_qup, color="C1") +plt.plot(X_sorted, y_pis_sym_sorted[:, 0], color="C1", ls="--") +plt.plot(X_sorted, y_pis_sym_sorted[:, 1], color="C1", ls="--") +plt.fill_between( + X_sorted.ravel(), + y_pis_sym_sorted[:, 0].ravel(), + y_pis_sym_sorted[:, 1].ravel(), + alpha=0.2, +) +plt.title( + f"Symmetric Intervals\n" + f"Target and effective coverages for " + f"alpha={alpha:.2f}: ({1-alpha:.3f}, {coverage_score_sym:.3f})" +) + +# Plot asymmetric prediction intervals +plt.subplot(1, 2, 2) +plt.xlabel("x") +plt.ylabel("y") +plt.scatter(X, y, alpha=0.3) +plt.plot(X_sorted, y_qlow, color="C2") +plt.plot(X_sorted, y_qup, color="C2") +plt.plot(X_sorted, y_pis_asym_sorted[:, 0], color="C2", ls="--") +plt.plot(X_sorted, y_pis_asym_sorted[:, 1], color="C2", ls="--") +plt.fill_between( + X_sorted.ravel(), + y_pis_asym_sorted[:, 0].ravel(), + y_pis_asym_sorted[:, 1].ravel(), + alpha=0.2, +) +plt.title( + f"Asymmetric Intervals\n" + f"Target and effective coverages for " + f"alpha={alpha:.2f}: ({1-alpha:.3f}, {coverage_score_asym:.3f})" +) +plt.tight_layout() +plt.show() + +############################################################################## +# The symmetric intervals (`symmetry=True`) use a combined set of residuals +# for both bounds, while the asymmetric intervals use distinct residuals for +# each bound, allowing for more flexible and accurate intervals that reflect +# the heteroscedastic nature of the data. The resulting effective coverages +# demonstrate the theoretical guarantee of the target coverage level +# :math:`1 - \alpha`. diff --git a/examples/regression/2-advanced-analysis/plot_timeseries_enbpi.py b/examples/regression/2-advanced-analysis/plot_timeseries_enbpi.py index 7c63343f9..6dda6b113 100644 --- a/examples/regression/2-advanced-analysis/plot_timeseries_enbpi.py +++ b/examples/regression/2-advanced-analysis/plot_timeseries_enbpi.py @@ -165,7 +165,7 @@ print( "Coverage / prediction interval width mean for MapieTimeSeriesRegressor: " "\nEnbPI without any partial_fit:" - f"{coverage_npfit_enbpi :.3f}, {width_npfit_enbpi:.3f}" + f"{coverage_npfit_enbpi:.3f}, {width_npfit_enbpi:.3f}" ) print( "Coverage / prediction interval width mean for MapieTimeSeriesRegressor: " diff --git a/examples/regression/3-scientific-articles/plot_kim2020_simulations.py b/examples/regression/3-scientific-articles/plot_kim2020_simulations.py index c1e184d5f..10795aea7 100644 --- a/examples/regression/3-scientific-articles/plot_kim2020_simulations.py +++ b/examples/regression/3-scientific-articles/plot_kim2020_simulations.py @@ -30,10 +30,9 @@ """ from __future__ import annotations -import ssl +import requests from io import BytesIO from typing import Any, Optional, Tuple -from urllib.request import urlopen from zipfile import ZipFile import matplotlib.pyplot as plt @@ -69,9 +68,8 @@ def get_X_y() -> Tuple[NDArray, NDArray]: zip_folder = "BlogFeedback.zip" csv_file = "blogData_train.csv" url = website + page + folder + zip_folder - ssl._create_default_https_context = ssl._create_unverified_context - resp = urlopen(url) - zipfile = ZipFile(BytesIO(resp.read())) + response = requests.get(url) + zipfile = ZipFile(BytesIO(response.content)) df = pd.read_csv(zipfile.open(csv_file)).to_numpy() X = df[:, :-1] y = np.log(1 + df[:, -1]) diff --git a/examples/regression/4-tutorials/plot_cqr_tutorial.py b/examples/regression/4-tutorials/plot_cqr_tutorial.py index f370fa78f..5e92e4542 100644 --- a/examples/regression/4-tutorials/plot_cqr_tutorial.py +++ b/examples/regression/4-tutorials/plot_cqr_tutorial.py @@ -121,7 +121,8 @@ class :class:`~mapie.subsample.Subsample` (note that the `alpha` parameter is estimator = LGBMRegressor( objective='quantile', alpha=0.5, - random_state=random_state + random_state=random_state, + verbose=-1 ) params_distributions = dict( num_leaves=randint(low=10, high=50), @@ -135,7 +136,6 @@ class :class:`~mapie.subsample.Subsample` (note that the `alpha` parameter is n_jobs=-1, n_iter=10, cv=KFold(n_splits=5, shuffle=True), - verbose=0, random_state=random_state ) optim_model.fit(X_train, y_train) diff --git a/examples/regression/4-tutorials/plot_main-tutorial-regression.py b/examples/regression/4-tutorials/plot_main-tutorial-regression.py index 50c2fd48d..51d97c8f4 100644 --- a/examples/regression/4-tutorials/plot_main-tutorial-regression.py +++ b/examples/regression/4-tutorials/plot_main-tutorial-regression.py @@ -1,32 +1,26 @@ -""" +r""" =============================== Tutorial for tabular regression =============================== + +In this tutorial, we compare the prediction intervals estimated by MAPIE on a +simple, one-dimensional, ground truth function :math:`f(x) = x \times \sin(x)`. +Throughout this tutorial, we will answer the following questions: + +- How well do the MAPIE strategies capture the aleatoric uncertainty + existing in the data? +- How do the prediction intervals estimated by the resampling strategies + evolve for new *out-of-distribution* data ? +- How do the prediction intervals vary between regressor models ? +Throughout this tutorial, we estimate the prediction intervals first using +a polynomial function, and then using a boosting model, and a simple neural +network. +**For practical problems, we advise using the faster CV+ or +Jackknife+-after-Bootstrap strategies. +For conservative prediction interval estimates, you can alternatively +use the CV-minmax strategies.** """ -############################################################################## -# In this tutorial, we compare the prediction intervals estimated by MAPIE on a -# simple, one-dimensional, ground truth function -# :math:`f(x) = x \times \sin(x)`. -# -# Throughout this tutorial, we will answer the following questions: -# -# - How well do the MAPIE strategies capture the aleatoric uncertainty -# existing in the data? -# -# - How do the prediction intervals estimated by the resampling strategies -# evolve for new *out-of-distribution* data ? -# -# - How do the prediction intervals vary between regressor models ? -# -# Throughout this tutorial, we estimate the prediction intervals first using -# a polynomial function, and then using a boosting model, and a simple neural -# network. -# -# **For practical problems, we advise using the faster CV+ or -# Jackknife+-after-Bootstrap strategies. -# For conservative prediction interval estimates, you can alternatively -# use the CV-minmax strategies.** import os import warnings diff --git a/examples/regression/4-tutorials/plot_ts-tutorial.py b/examples/regression/4-tutorials/plot_ts-tutorial.py index 24914c068..13dde284e 100644 --- a/examples/regression/4-tutorials/plot_ts-tutorial.py +++ b/examples/regression/4-tutorials/plot_ts-tutorial.py @@ -21,14 +21,14 @@ Once the base model is optimized, we can use :class:`~MapieTimeSeriesRegressor` to estimate the prediction intervals associated with one-step ahead forecasts through -the EnbPI method [1]. +the EnbPI method. As its parent class :class:`~MapieRegressor`, :class:`~MapieTimeSeriesRegressor` has two main arguments : "cv", and "method". In order to implement EnbPI, "method" must be set to "enbpi" (the default value) while "cv" must be set to the :class:`~mapie.subsample.BlockBootstrap` class that block bootstraps the training set. -This sampling method is used in [1] instead of the traditional bootstrap +This sampling method is used instead of the traditional bootstrap strategy as it is more suited for time series data. The EnbPI method allows you update the residuals during the prediction, @@ -38,26 +38,12 @@ class that block bootstraps the training set. the ``partial_fit`` class method called at every step. -The ACI [2] strategy allows you to adapt the conformal inference +The ACI strategy allows you to adapt the conformal inference (i.e the quantile). If the real values are not in the coverage, the size of the intervals will grow. Conversely, if the real values are in the coverage, the size of the intervals will decrease. You can use a gamma coefficient to adjust the strength of the correction. - -References ----------- -[1] Chen Xu and Yao Xie. -“Conformal Prediction Interval for Dynamic Time-Series.” -International Conference on Machine Learning (ICML, 2021). - -[2] Isaac Gibbs, Emmanuel Candes -"Adaptive conformal inference under distribution shift" -Advances in Neural Information Processing Systems, (NeurIPS, 2021). - -[3] Margaux Zaffran et al. -"Adaptive Conformal Predictions for Time Series" -https://arxiv.org/pdf/2202.07282.pdf """ import warnings @@ -180,7 +166,7 @@ class that block bootstraps the training set. # # We now use :class:`~MapieTimeSeriesRegressor` to build prediction intervals # associated with one-step ahead forecasts. As explained in the introduction, -# we use the EnbPI method [1] and the ACI method [2] . +# we use the EnbPI method and the ACI method. # # Estimating prediction intervals can be possible in three ways: # @@ -199,7 +185,7 @@ class that block bootstraps the training set. # sudden change points on test sets that have not been seen by the model # during training. # -# Following [1], we use the :class:`~BlockBootstrap` sampling +# We use the :class:`~BlockBootstrap` sampling # method instead of the traditional bootstrap strategy for training the model # since the former is more suited for time series data. # Here, we choose to perform 10 resamplings with 10 blocks. diff --git a/mapie/calibration.py b/mapie/calibration.py index 7a9e495ef..d15c83872 100644 --- a/mapie/calibration.py +++ b/mapie/calibration.py @@ -97,15 +97,15 @@ class MapieCalibrator(BaseEstimator, ClassifierMixin): >>> mapie = MapieCalibrator().fit(X_toy, y_toy, random_state=20) >>> y_calib = mapie.predict_proba(X_toy) >>> print(y_calib) - [[0.84900723 nan nan] - [0.75432411 nan nan] - [0.62285341 nan nan] - [ nan 0.33333333 nan] - [ nan 0.33333333 nan] - [ nan 0.33333333 nan] - [ nan nan 0.33333002] - [ nan nan 0.54326683] - [ nan nan 0.66666124]] + [[0.84...... nan nan] + [0.75...... nan nan] + [0.62...... nan nan] + [ nan 0.33...... nan] + [ nan 0.33...... nan] + [ nan 0.33...... nan] + [ nan nan 0.33......] + [ nan nan 0.54......] + [ nan nan 0.66......]] """ fit_attributes = [ diff --git a/mapie/classification.py b/mapie/classification.py index b636bd6ab..bf13945c1 100644 --- a/mapie/classification.py +++ b/mapie/classification.py @@ -23,6 +23,11 @@ compute_quantiles, fit_estimator, fix_number_of_classes) +from mapie.conformity_scores.utils_classification_conformity_scores import ( + get_true_label_position, +) + + class MapieClassifier(BaseEstimator, ClassifierMixin): """ Prediction sets for classification. @@ -737,39 +742,6 @@ def _regularize_conformity_score( ) return conf_score - def _get_true_label_position( - self, - y_pred_proba: NDArray, - y: NDArray - ) -> NDArray: - """ - Return the sorted position of the true label in the - prediction - - Parameters - ---------- - y_pred_proba: NDArray of shape (n_samples, n_calsses) - Model prediction. - - y: NDArray of shape (n_samples) - Labels. - - Returns - ------- - NDArray of shape (n_samples, 1) - Position of the true label in the prediction. - """ - index = np.argsort( - np.fliplr(np.argsort(y_pred_proba, axis=1)) - ) - position = np.take_along_axis( - index, - y.reshape(-1, 1), - axis=1 - ) - - return position - def _get_last_included_proba( self, y_pred_proba: NDArray, @@ -1217,7 +1189,7 @@ def fit( self.y_pred_proba_raps = self.single_estimator_.predict_proba( self.X_raps ) - self.position_raps = self._get_true_label_position( + self.position_raps = get_true_label_position( self.y_pred_proba_raps, self.y_raps ) @@ -1249,7 +1221,7 @@ def fit( # Here we reorder the labels by decreasing probability # and get the position of each label from decreasing # probability - self.conformity_scores_ = self._get_true_label_position( + self.conformity_scores_ = get_true_label_position( y_pred_proba, y_enc ) diff --git a/mapie/conformity_scores/utils_classification_conformity_scores.py b/mapie/conformity_scores/utils_classification_conformity_scores.py new file mode 100644 index 000000000..8cc3bf9d4 --- /dev/null +++ b/mapie/conformity_scores/utils_classification_conformity_scores.py @@ -0,0 +1,26 @@ +import numpy as np +from mapie._typing import NDArray + + +def get_true_label_position(y_pred_proba: NDArray, y: NDArray) -> NDArray: + """ + Return the sorted position of the true label in the + prediction + + Parameters + ---------- + y_pred_proba: NDArray of shape (n_samples, n_classes) + Model prediction. + + y: NDArray of shape (n_samples) + Labels. + + Returns + ------- + NDArray of shape (n_samples, 1) + Position of the true label in the prediction. + """ + index = np.argsort(np.fliplr(np.argsort(y_pred_proba, axis=1))) + position = np.take_along_axis(index, y.reshape(-1, 1), axis=1) + + return position diff --git a/mapie/metrics.py b/mapie/metrics.py index e78f02c7c..20c5065f0 100644 --- a/mapie/metrics.py +++ b/mapie/metrics.py @@ -541,6 +541,11 @@ def regression_ssc_score( (intervals of different sizes), with constant intervals the result may be misinterpreted. + [3] Angelopoulos, A. N., & Bates, S. (2021). + A gentle introduction to conformal prediction and + distribution-free uncertainty quantification. + arXiv preprint arXiv:2107.07511. + Parameters ---------- y_true: NDArray of shape (n_samples,) diff --git a/mapie/tests/test_calibration.py b/mapie/tests/test_calibration.py index aeefada56..0057d7fdc 100644 --- a/mapie/tests/test_calibration.py +++ b/mapie/tests/test_calibration.py @@ -314,7 +314,9 @@ def test_correct_results(cv: str) -> None: pred_ = mapie_cal.predict_proba(X_test) top_label_ece_ = top_label_ece(y_test, pred_) np.testing.assert_array_almost_equal( - results[cv]["y_score"], pred_ # type:ignore + np.array(results[cv]["y_score"]), + np.array(pred_), + decimal=2 ) np.testing.assert_allclose( # type:ignore results[cv]["top_label_ece"], @@ -345,7 +347,9 @@ def test_correct_results_binary(cv: str) -> None: top_label_ece_ = top_label_ece(y_binary, pred_) ece = expected_calibration_error(y_binary, pred_) np.testing.assert_array_almost_equal( - results_binary[cv]["y_score"], pred_ # type:ignore + np.array(results_binary[cv]["y_score"]), + np.array(pred_), + decimal=2 ) np.testing.assert_allclose( # type:ignore results_binary[cv]["top_label_ece"], diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index d6786e3d2..fc1f3e6ba 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -1,7 +1,7 @@ from __future__ import annotations from copy import deepcopy -from typing import Any, Dict, Iterable, List, Optional, Union, cast +from typing import Any, Dict, Iterable, Optional, Union, cast import numpy as np import pandas as pd @@ -57,25 +57,6 @@ ) ] -Y_TRUE_PROBA_PLACE = [ - [ - np.array([2, 0]), - np.array([ - [.1, .3, .6], - [.2, .7, .1] - ]), - np.array([[0], [1]]) - ], - [ - np.array([1, 0]), - np.array([ - [.7, .12, .18], - [.5, .24, .26] - ]), - np.array([[2], [0]]) - ] -] - Params = TypedDict( "Params", { @@ -1854,23 +1835,6 @@ def test_get_last_included_proba_shape(k_lambda, strategy): assert y_p_p_i_l.shape == (len(X), 1, len(thresholds)) -@pytest.mark.parametrize("y_true_proba_place", Y_TRUE_PROBA_PLACE) -def test_get_true_label_position( - y_true_proba_place: List[NDArray] -) -> None: - """ - Check that the returned true label position the good. - """ - y_true = y_true_proba_place[0] - y_pred_proba = y_true_proba_place[1] - place = y_true_proba_place[2] - - mapie = MapieClassifier(random_state=random_state) - found_place = mapie._get_true_label_position(y_pred_proba, y_true) - - assert (found_place == place).all() - - @pytest.mark.parametrize("cv", [5, None]) def test_error_raps_cv_not_prefit(cv: Union[int, None]) -> None: """ diff --git a/mapie/tests/test_utils_classification_conformity_scores.py b/mapie/tests/test_utils_classification_conformity_scores.py new file mode 100644 index 000000000..bbb73f383 --- /dev/null +++ b/mapie/tests/test_utils_classification_conformity_scores.py @@ -0,0 +1,54 @@ +from typing import List + +import numpy as np +import pytest + +from mapie.conformity_scores.utils_classification_conformity_scores import ( + get_true_label_position, +) +from mapie._typing import NDArray + +Y_TRUE_PROBA_PLACE = [ + [ + np.array([2, 0]), + np.array([ + [.1, .3, .6], + [.2, .7, .1] + ]), + np.array([[0], [1]]) + ], + [ + np.array([1, 0]), + np.array([ + [.7, .12, .18], + [.5, .24, .26] + ]), + np.array([[2], [0]]) + ] +] + + +def test_shape_get_true_label_position() -> None: + """ + Check the shape returned by the function + """ + y_pred_proba = np.random.rand(5, 3) + y = np.random.randint(0, 3, size=(5, 1)) + position = get_true_label_position(y_pred_proba, y) + assert position.shape == y.shape + + +@pytest.mark.parametrize("y_true_proba_place", Y_TRUE_PROBA_PLACE) +def test_get_true_label_position( + y_true_proba_place: List[NDArray] +) -> None: + """ + Check that the returned true label position the good. + """ + y_true = y_true_proba_place[0] + y_pred_proba = y_true_proba_place[1] + place = y_true_proba_place[2] + + found_place = get_true_label_position(y_pred_proba, y_true) + + assert (found_place == place).all() diff --git a/setup.py b/setup.py index 6233b08d9..6fedb4cef 100644 --- a/setup.py +++ b/setup.py @@ -21,9 +21,9 @@ LICENSE = "new BSD" MAINTAINER = "T. Cordier, V. Blot, L. Lacombe" MAINTAINER_EMAIL = ( - "tcordier@quantmetry.com, " - "vblot@quantmetry.com, " - "llacombe@quantmetry.com" + "thibault.a.cordier@capgemini.com, " + "vincent.blot@capgemini.com, " + "louis.lacombe@capgemini.com" ) PYTHON_REQUIRES = ">=3.7" PACKAGES = find_packages() @@ -41,7 +41,8 @@ "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10" + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11" ] setup(