From 0d0dec916ef8c370ddc69a4d6c7c8b7cd831d2d9 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Tue, 9 Apr 2024 15:49:34 +0200 Subject: [PATCH 001/128] ENH: move get_true_label_position to utils in classification --- mapie/classification.py | 447 ++++++------------ .../utils_classification_conformity_scores.py | 26 + ..._utils_classification_conformity_scores.py | 21 + 3 files changed, 191 insertions(+), 303 deletions(-) create mode 100644 mapie/conformity_scores/utils_classification_conformity_scores.py create mode 100644 mapie/tests/test_utils_classification_conformity_scores.py diff --git a/mapie/classification.py b/mapie/classification.py index b636bd6ab..16b687bde 100644 --- a/mapie/classification.py +++ b/mapie/classification.py @@ -9,18 +9,29 @@ from sklearn.model_selection import BaseCrossValidator, ShuffleSplit from sklearn.preprocessing import LabelEncoder, label_binarize from sklearn.utils import _safe_indexing, check_random_state -from sklearn.utils.multiclass import (check_classification_targets, - type_of_target) -from sklearn.utils.validation import (_check_y, _num_samples, check_is_fitted, - indexable) +from sklearn.utils.multiclass import check_classification_targets, type_of_target +from sklearn.utils.validation import _check_y, _num_samples, check_is_fitted, indexable from ._machine_precision import EPSILON from ._typing import ArrayLike, NDArray from .metrics import classification_mean_width_score -from .utils import (check_alpha, check_alpha_and_n_samples, check_cv, - check_estimator_classification, check_n_features_in, - check_n_jobs, check_null_weight, check_verbose, - compute_quantiles, fit_estimator, fix_number_of_classes) +from .utils import ( + check_alpha, + check_alpha_and_n_samples, + check_cv, + check_estimator_classification, + check_n_features_in, + check_n_jobs, + check_null_weight, + check_verbose, + compute_quantiles, + fit_estimator, + fix_number_of_classes, +) + +from mapie.conformity_scores.utils_classification_conformity_scores import ( + get_true_label_position, +) class MapieClassifier(BaseEstimator, ClassifierMixin): @@ -187,7 +198,13 @@ class MapieClassifier(BaseEstimator, ClassifierMixin): raps_valid_cv_ = ["prefit", "split"] valid_methods_ = [ - "naive", "score", "lac", "cumulated_score", "aps", "top_k", "raps" + "naive", + "score", + "lac", + "cumulated_score", + "aps", + "top_k", + "raps", ] fit_attributes = [ "single_estimator_", @@ -196,7 +213,7 @@ class MapieClassifier(BaseEstimator, ClassifierMixin): "n_features_in_", "conformity_scores_", "classes_", - "label_encoder_" + "label_encoder_", ] def __init__( @@ -207,7 +224,7 @@ def __init__( test_size: Optional[Union[int, float]] = None, n_jobs: Optional[int] = None, random_state: Optional[Union[int, np.random.RandomState]] = None, - verbose: int = 0 + verbose: int = 0, ) -> None: self.estimator = estimator self.method = method @@ -228,8 +245,7 @@ def _check_parameters(self) -> None: """ if self.method not in self.valid_methods_: raise ValueError( - "Invalid method. " - f"Allowed values are {self.valid_methods_}." + "Invalid method. " f"Allowed values are {self.valid_methods_}." ) check_n_jobs(self.n_jobs) check_verbose(self.verbose) @@ -250,18 +266,18 @@ def _check_depreciated(self) -> None: if self.method == "score": warnings.warn( "WARNING: Deprecated method. " - + "The method \"score\" is outdated. " - + "Prefer to use \"lac\" instead to keep " + + 'The method "score" is outdated. ' + + 'Prefer to use "lac" instead to keep ' + "the same behavior in the next release.", - DeprecationWarning + DeprecationWarning, ) if self.method == "cumulated_score": warnings.warn( "WARNING: Deprecated method. " - + "The method \"cumulated_score\" is outdated. " - + "Prefer to use \"aps\" instead to keep " + + 'The method "cumulated_score" is outdated. ' + + 'Prefer to use "aps" instead to keep ' + "the same behavior in the next release.", - DeprecationWarning + DeprecationWarning, ) def _check_target(self, y: ArrayLike) -> None: @@ -281,8 +297,7 @@ def _check_target(self, y: ArrayLike) -> None: or ``"score"`` or if type of target is not multi-class. """ check_classification_targets(y) - if type_of_target(y) == "binary" and \ - self.method not in ["score", "lac"]: + if type_of_target(y) == "binary" and self.method not in ["score", "lac"]: raise ValueError( "Invalid method for binary target. " "Your target is not of type multiclass and " @@ -301,17 +316,14 @@ def _check_raps(self): If ``method`` is ``"raps"`` and ``cv`` is not ``"prefit"``. """ if (self.method == "raps") and ( - (self.cv not in self.raps_valid_cv_) - or isinstance(self.cv, ShuffleSplit) + (self.cv not in self.raps_valid_cv_) or isinstance(self.cv, ShuffleSplit) ): raise ValueError( - "RAPS method can only be used " - f"with cv in {self.raps_valid_cv_}." + "RAPS method can only be used " f"with cv in {self.raps_valid_cv_}." ) def _check_include_last_label( - self, - include_last_label: Optional[Union[bool, str]] + self, include_last_label: Optional[Union[bool, str]] ) -> Optional[Union[bool, str]]: """ Check if ``include_last_label`` is a boolean or a string. @@ -342,9 +354,8 @@ def _check_include_last_label( "Invalid include_last_label argument. " "Should be a boolean or 'randomized'." """ - if ( - (not isinstance(include_last_label, bool)) and - (not include_last_label == "randomized") + if (not isinstance(include_last_label, bool)) and ( + not include_last_label == "randomized" ): raise ValueError( "Invalid include_last_label argument. " @@ -354,9 +365,7 @@ def _check_include_last_label( return include_last_label def _check_proba_normalized( - self, - y_pred_proba: ArrayLike, - axis: int = 1 + self, y_pred_proba: ArrayLike, axis: int = 1 ) -> NDArray: """ Check if, for all the observations, the sum of @@ -384,7 +393,7 @@ def _check_proba_normalized( np.sum(y_pred_proba, axis=axis), 1, err_msg="The sum of the scores is not equal to one.", - rtol=1e-5 + rtol=1e-5, ) y_pred_proba = cast(NDArray, y_pred_proba).astype(np.float64) return y_pred_proba @@ -393,7 +402,7 @@ def _get_last_index_included( self, y_pred_proba_cumsum: NDArray, threshold: NDArray, - include_last_label: Optional[Union[bool, str]] + include_last_label: Optional[Union[bool, str]], ) -> NDArray: """ Return the index of the last included sorted probability @@ -424,27 +433,19 @@ def _get_last_index_included( NDArray of shape (n_samples, n_alpha) Index of the last included sorted probability. """ - if ( - (include_last_label) or - (include_last_label == 'randomized') - ): - y_pred_index_last = ( - np.ma.masked_less( - y_pred_proba_cumsum - - threshold[np.newaxis, :], - -EPSILON - ).argmin(axis=1) - ) - elif (include_last_label is False): + if (include_last_label) or (include_last_label == "randomized"): + y_pred_index_last = np.ma.masked_less( + y_pred_proba_cumsum - threshold[np.newaxis, :], -EPSILON + ).argmin(axis=1) + elif include_last_label is False: max_threshold = np.maximum( - threshold[np.newaxis, :], - np.min(y_pred_proba_cumsum, axis=1) + threshold[np.newaxis, :], np.min(y_pred_proba_cumsum, axis=1) ) y_pred_index_last = np.argmax( np.ma.masked_greater( - y_pred_proba_cumsum - max_threshold[:, np.newaxis, :], - EPSILON - ), axis=1 + y_pred_proba_cumsum - max_threshold[:, np.newaxis, :], EPSILON + ), + axis=1, ) else: raise ValueError( @@ -461,7 +462,7 @@ def _add_random_tie_breaking( y_pred_proba_last: NDArray, threshold: NDArray, lambda_star: Union[NDArray, float, None], - k_star: Union[NDArray, None] + k_star: Union[NDArray, None], ) -> NDArray: """ Randomly remove last label from prediction set based on the @@ -507,29 +508,21 @@ def _add_random_tie_breaking( """ # get cumsumed probabilities up to last retained label y_proba_last_cumsumed = np.squeeze( - np.take_along_axis( - y_pred_proba_cumsum, - y_pred_index_last, - axis=1 - ), axis=1 + np.take_along_axis(y_pred_proba_cumsum, y_pred_index_last, axis=1), axis=1 ) if self.method in ["cumulated_score", "aps"]: # compute V parameter from Romano+(2020) - vs = ( - (y_proba_last_cumsumed - threshold.reshape(1, -1)) / - y_pred_proba_last[:, 0, :] - ) + vs = (y_proba_last_cumsumed - threshold.reshape(1, -1)) / y_pred_proba_last[ + :, 0, : + ] else: # compute V parameter from Angelopoulos+(2020) L = np.sum(prediction_sets, axis=1) - vs = ( - (y_proba_last_cumsumed - threshold.reshape(1, -1)) / - ( - y_pred_proba_last[:, 0, :] - - lambda_star * np.maximum(0, L - k_star) + - lambda_star * (L > k_star) - ) + vs = (y_proba_last_cumsumed - threshold.reshape(1, -1)) / ( + y_pred_proba_last[:, 0, :] + - lambda_star * np.maximum(0, L - k_star) + + lambda_star * (L > k_star) ) # get random numbers for each observation and alpha value @@ -541,7 +534,7 @@ def _add_random_tie_breaking( prediction_sets, y_pred_index_last, vs_less_than_us[:, np.newaxis, :], - axis=1 + axis=1, ) return prediction_sets @@ -570,9 +563,7 @@ def _predict_oof_model( # we enforce y_pred_proba to contain all labels included in y if len(estimator.classes_) != self.n_classes_: y_pred_proba = fix_number_of_classes( - self.n_classes_, - estimator.classes_, - y_pred_proba + self.n_classes_, estimator.classes_, y_pred_proba ) y_pred_proba = self._check_proba_normalized(y_pred_proba) return y_pred_proba @@ -637,9 +628,7 @@ def _fit_and_predict_oof_model( y_val = _safe_indexing(y, val_index) if sample_weight is None: - estimator = fit_estimator( - estimator, X_train, y_train, **fit_params - ) + estimator = fit_estimator(estimator, X_train, y_train, **fit_params) else: sample_weight_train = _safe_indexing(sample_weight, train_index) estimator = fit_estimator( @@ -653,9 +642,7 @@ def _fit_and_predict_oof_model( return estimator, y_pred_proba, val_id, val_index def _get_true_label_cumsum_proba( - self, - y: ArrayLike, - y_pred_proba: NDArray + self, y: ArrayLike, y_pred_proba: NDArray ) -> Tuple[NDArray, NDArray]: """ Compute the cumsumed probability of the true label. @@ -674,13 +661,9 @@ def _get_true_label_cumsum_proba( is the cumsum probability of the true label. The second is the sorted position of the true label. """ - y_true = label_binarize( - y=y, classes=self.classes_ - ) + y_true = label_binarize(y=y, classes=self.classes_) index_sorted = np.fliplr(np.argsort(y_pred_proba, axis=1)) - y_pred_proba_sorted = np.take_along_axis( - y_pred_proba, index_sorted, axis=1 - ) + y_pred_proba_sorted = np.take_along_axis(y_pred_proba, index_sorted, axis=1) y_true_sorted = np.take_along_axis(y_true, index_sorted, axis=1) y_pred_proba_sorted_cumsum = np.cumsum(y_pred_proba_sorted, axis=1) cutoff = np.argmax(y_true_sorted, axis=1) @@ -695,7 +678,7 @@ def _regularize_conformity_score( k_star: NDArray, lambda_: Union[NDArray, float], conf_score: NDArray, - cutoff: NDArray + cutoff: NDArray, ) -> NDArray: """ Regularize the conformity scores with the ``"raps"`` @@ -722,61 +705,18 @@ def _regularize_conformity_score( Regularized conformity scores. The regularization depends on the value of alpha. """ - conf_score = np.repeat( - conf_score[:, :, np.newaxis], len(k_star), axis=2 - ) - cutoff = np.repeat( - cutoff[:, np.newaxis], len(k_star), axis=1 - ) - conf_score += np.maximum( - np.expand_dims( - lambda_ * (cutoff - k_star), - axis=1 - ), - 0 - ) + conf_score = np.repeat(conf_score[:, :, np.newaxis], len(k_star), axis=2) + cutoff = np.repeat(cutoff[:, np.newaxis], len(k_star), axis=1) + conf_score += np.maximum(np.expand_dims(lambda_ * (cutoff - k_star), axis=1), 0) return conf_score - def _get_true_label_position( - self, - y_pred_proba: NDArray, - y: NDArray - ) -> NDArray: - """ - Return the sorted position of the true label in the - prediction - - Parameters - ---------- - y_pred_proba: NDArray of shape (n_samples, n_calsses) - Model prediction. - - y: NDArray of shape (n_samples) - Labels. - - Returns - ------- - NDArray of shape (n_samples, 1) - Position of the true label in the prediction. - """ - index = np.argsort( - np.fliplr(np.argsort(y_pred_proba, axis=1)) - ) - position = np.take_along_axis( - index, - y.reshape(-1, 1), - axis=1 - ) - - return position - def _get_last_included_proba( self, y_pred_proba: NDArray, thresholds: NDArray, include_last_label: Union[bool, str, None], lambda_: Union[NDArray, float, None], - k_star: Union[NDArray, Any] + k_star: Union[NDArray, Any], ) -> Tuple[NDArray, NDArray, NDArray]: """ Function that returns the smallest score @@ -811,46 +751,28 @@ def _get_last_included_proba( with the RAPS method, the index of the last included score and the value of the last included score. """ - index_sorted = np.flip( - np.argsort(y_pred_proba, axis=1), axis=1 - ) + index_sorted = np.flip(np.argsort(y_pred_proba, axis=1), axis=1) # sort probabilities by decreasing order - y_pred_proba_sorted = np.take_along_axis( - y_pred_proba, index_sorted, axis=1 - ) + y_pred_proba_sorted = np.take_along_axis(y_pred_proba, index_sorted, axis=1) # get sorted cumulated score - y_pred_proba_sorted_cumsum = np.cumsum( - y_pred_proba_sorted, axis=1 - ) + y_pred_proba_sorted_cumsum = np.cumsum(y_pred_proba_sorted, axis=1) if self.method == "raps": y_pred_proba_sorted_cumsum += lambda_ * np.maximum( - 0, - np.cumsum( - np.ones(y_pred_proba_sorted_cumsum.shape), - axis=1 - ) - k_star + 0, np.cumsum(np.ones(y_pred_proba_sorted_cumsum.shape), axis=1) - k_star ) # get cumulated score at their original position y_pred_proba_cumsum = np.take_along_axis( - y_pred_proba_sorted_cumsum, - np.argsort(index_sorted, axis=1), - axis=1 + y_pred_proba_sorted_cumsum, np.argsort(index_sorted, axis=1), axis=1 ) # get index of the last included label y_pred_index_last = self._get_last_index_included( - y_pred_proba_cumsum, - thresholds, - include_last_label + y_pred_proba_cumsum, thresholds, include_last_label ) # get the probability of the last included label - y_pred_proba_last = np.take_along_axis( - y_pred_proba, - y_pred_index_last, - axis=1 - ) + y_pred_proba_last = np.take_along_axis(y_pred_proba, y_pred_index_last, axis=1) - zeros_scores_proba_last = (y_pred_proba_last <= EPSILON) + zeros_scores_proba_last = y_pred_proba_last <= EPSILON # If the last included proba is zero, change it to the # smallest non-zero value to avoid inluding them in the @@ -858,12 +780,10 @@ def _get_last_included_proba( if np.sum(zeros_scores_proba_last) > 0: y_pred_proba_last[zeros_scores_proba_last] = np.expand_dims( np.min( - np.ma.masked_less( - y_pred_proba, - EPSILON - ).filled(fill_value=np.inf), - axis=1 - ), axis=1 + np.ma.masked_less(y_pred_proba, EPSILON).filled(fill_value=np.inf), + axis=1, + ), + axis=1, )[zeros_scores_proba_last] return y_pred_proba_cumsum, y_pred_index_last, y_pred_proba_last @@ -874,7 +794,7 @@ def _update_size_and_lambda( alpha_np: NDArray, y_ps: NDArray, lambda_: Union[NDArray, float], - lambda_star: NDArray + lambda_star: NDArray, ) -> Tuple[NDArray, NDArray]: """Update the values of the optimal lambda if the average size of the prediction sets decreases with @@ -908,15 +828,11 @@ def _update_size_and_lambda( """ sizes = [ - classification_mean_width_score( - y_ps[:, :, i] - ) for i in range(len(alpha_np)) + classification_mean_width_score(y_ps[:, :, i]) for i in range(len(alpha_np)) ] - sizes_improve = (sizes < best_sizes - EPSILON) - lambda_star = ( - sizes_improve * lambda_ + (1 - sizes_improve) * lambda_star - ) + sizes_improve = sizes < best_sizes - EPSILON + lambda_star = sizes_improve * lambda_ + (1 - sizes_improve) * lambda_star best_sizes = sizes_improve * sizes + (1 - sizes_improve) * best_sizes return lambda_star, best_sizes @@ -926,7 +842,7 @@ def _find_lambda_star( y_pred_proba_raps: NDArray, alpha_np: NDArray, include_last_label: Union[bool, str, None], - k_star: NDArray + k_star: NDArray, ) -> Union[NDArray, float]: """Find the optimal value of lambda for each alpha. @@ -954,37 +870,23 @@ def _find_lambda_star( lambda_star = np.zeros(len(alpha_np)) best_sizes = np.full(len(alpha_np), np.finfo(np.float64).max) - for lambda_ in [.001, .01, .1, .2, .5]: # values given in paper[3] - true_label_cumsum_proba, cutoff = ( - self._get_true_label_cumsum_proba( - self.y_raps_no_enc, - y_pred_proba_raps[:, :, 0], - ) + for lambda_ in [0.001, 0.01, 0.1, 0.2, 0.5]: # values given in paper[3] + true_label_cumsum_proba, cutoff = self._get_true_label_cumsum_proba( + self.y_raps_no_enc, + y_pred_proba_raps[:, :, 0], ) true_label_cumsum_proba_reg = self._regularize_conformity_score( - k_star, - lambda_, - true_label_cumsum_proba, - cutoff + k_star, lambda_, true_label_cumsum_proba, cutoff ) - quantiles_ = compute_quantiles( - true_label_cumsum_proba_reg, - alpha_np - ) + quantiles_ = compute_quantiles(true_label_cumsum_proba_reg, alpha_np) _, _, y_pred_proba_last = self._get_last_included_proba( - y_pred_proba_raps, - quantiles_, - include_last_label, - lambda_, - k_star + y_pred_proba_raps, quantiles_, include_last_label, lambda_, k_star ) - y_ps = np.greater_equal( - y_pred_proba_raps - y_pred_proba_last, -EPSILON - ) + y_ps = np.greater_equal(y_pred_proba_raps - y_pred_proba_last, -EPSILON) lambda_star, best_sizes = self._update_size_and_lambda( best_sizes, alpha_np, y_ps, lambda_, lambda_star ) @@ -993,7 +895,7 @@ def _find_lambda_star( return lambda_star def _get_classes_info( - self, estimator: ClassifierMixin, y: NDArray + self, estimator: ClassifierMixin, y: NDArray ) -> Tuple[int, NDArray]: """ Compute the number of classes and the classes values @@ -1052,7 +954,7 @@ def fit( X: ArrayLike, y: ArrayLike, sample_weight: Optional[ArrayLike] = None, - size_raps: Optional[float] = .2, + size_raps: Optional[float] = 0.2, groups: Optional[ArrayLike] = None, **fit_params, ) -> MapieClassifier: @@ -1098,9 +1000,7 @@ def fit( """ # Checks self._check_parameters() - cv = check_cv( - self.cv, test_size=self.test_size, random_state=self.random_state - ) + cv = check_cv(self.cv, test_size=self.test_size, random_state=self.random_state) X, y = indexable(X, y) y = _check_y(y) @@ -1110,19 +1010,12 @@ def fit( y = cast(NDArray, y) - estimator = check_estimator_classification( - X, - y, - cv, - self.estimator - ) + estimator = check_estimator_classification(X, y, cv, self.estimator) self.n_features_in_ = check_n_features_in(X, cv, estimator) n_samples = _num_samples(y) - self.n_classes_, self.classes_ = self._get_classes_info( - estimator, y - ) + self.n_classes_, self.classes_ = self._get_classes_info(estimator, y) enc = LabelEncoder() enc.fit(self.classes_) y_enc = enc.transform(y) @@ -1140,14 +1033,13 @@ def fit( 1, test_size=size_raps, random_state=self.random_state ) train_raps_index, val_raps_index = next(raps_split.split(X)) - X, self.X_raps, y_enc, self.y_raps = \ - _safe_indexing(X, train_raps_index), \ - _safe_indexing(X, val_raps_index), \ - _safe_indexing(y_enc, train_raps_index), \ - _safe_indexing(y_enc, val_raps_index) - self.y_raps_no_enc = self.label_encoder_.inverse_transform( - self.y_raps + X, self.X_raps, y_enc, self.y_raps = ( + _safe_indexing(X, train_raps_index), + _safe_indexing(X, val_raps_index), + _safe_indexing(y_enc, train_raps_index), + _safe_indexing(y_enc, val_raps_index), ) + self.y_raps_no_enc = self.label_encoder_.inverse_transform(self.y_raps) y = self.label_encoder_.inverse_transform(y_enc) y_enc = cast(NDArray, y_enc) n_samples = _num_samples(y_enc) @@ -1169,10 +1061,7 @@ def fit( self.single_estimator_ = fit_estimator( clone(estimator), X, y, sample_weight, **fit_params ) - y_pred_proba = np.empty( - (n_samples, self.n_classes_), - dtype=float - ) + y_pred_proba = np.empty((n_samples, self.n_classes_), dtype=float) outputs = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(self._fit_and_predict_oof_model)( clone(estimator), @@ -1184,23 +1073,14 @@ def fit( sample_weight, **fit_params, ) - for k, (train_index, val_index) in enumerate( - cv.split(X, y_enc, groups) - ) + for k, (train_index, val_index) in enumerate(cv.split(X, y_enc, groups)) ) - ( - self.estimators_, - predictions_list, - val_ids_list, - val_indices_list - ) = map(list, zip(*outputs)) - predictions = np.concatenate( - cast(List[NDArray], predictions_list) + (self.estimators_, predictions_list, val_ids_list, val_indices_list) = map( + list, zip(*outputs) ) + predictions = np.concatenate(cast(List[NDArray], predictions_list)) val_ids = np.concatenate(cast(List[NDArray], val_ids_list)) - val_indices = np.concatenate( - cast(List[NDArray], val_indices_list) - ) + val_indices = np.concatenate(cast(List[NDArray], val_indices_list)) self.k_[val_indices] = val_ids y_pred_proba[val_indices] = predictions @@ -1214,30 +1094,21 @@ def fit( # RAPS: compute y_pred and position on the RAPS validation dataset if self.method == "raps": - self.y_pred_proba_raps = self.single_estimator_.predict_proba( - self.X_raps - ) - self.position_raps = self._get_true_label_position( - self.y_pred_proba_raps, - self.y_raps + self.y_pred_proba_raps = self.single_estimator_.predict_proba(self.X_raps) + self.position_raps = get_true_label_position( + self.y_pred_proba_raps, self.y_raps ) # Conformity scores if self.method == "naive": - self.conformity_scores_ = np.empty( - y_pred_proba.shape, - dtype="float" - ) + self.conformity_scores_ = np.empty(y_pred_proba.shape, dtype="float") elif self.method in ["score", "lac"]: self.conformity_scores_ = np.take_along_axis( 1 - y_pred_proba, y_enc.reshape(-1, 1), axis=1 ) elif self.method in ["cumulated_score", "aps", "raps"]: - self.conformity_scores_, self.cutoff = ( - self._get_true_label_cumsum_proba( - y, - y_pred_proba - ) + self.conformity_scores_, self.cutoff = self._get_true_label_cumsum_proba( + y, y_pred_proba ) y_proba_true = np.take_along_axis( y_pred_proba, y_enc.reshape(-1, 1), axis=1 @@ -1249,14 +1120,10 @@ def fit( # Here we reorder the labels by decreasing probability # and get the position of each label from decreasing # probability - self.conformity_scores_ = self._get_true_label_position( - y_pred_proba, - y_enc - ) + self.conformity_scores_ = get_true_label_position(y_pred_proba, y_enc) else: raise ValueError( - "Invalid method. " - f"Allowed values are {self.valid_methods_}." + "Invalid method. " f"Allowed values are {self.valid_methods_}." ) if isinstance(cv, ShuffleSplit): @@ -1269,7 +1136,7 @@ def predict( X: ArrayLike, alpha: Optional[Union[float, Iterable[float]]] = None, include_last_label: Optional[Union[bool, str]] = True, - agg_scores: Optional[str] = "mean" + agg_scores: Optional[str] = "mean", ) -> Union[NDArray, Tuple[NDArray, NDArray]]: """ Prediction prediction sets on new samples based on target confidence @@ -1339,9 +1206,7 @@ def predict( if self.method == "top_k": agg_scores = "mean" # Checks - cv = check_cv( - self.cv, test_size=self.test_size, random_state=self.random_state - ) + cv = check_cv(self.cv, test_size=self.test_size, random_state=self.random_state) include_last_label = self._check_include_last_label(include_last_label) alpha = cast(Optional[NDArray], check_alpha(alpha)) check_is_fitted(self, self.fit_attributes) @@ -1366,9 +1231,7 @@ def predict( ) else: y_pred_proba_k = np.asarray( - Parallel( - n_jobs=self.n_jobs, verbose=self.verbose - )( + Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(self._predict_oof_model)(estimator, X) for estimator in self.estimators_ ) @@ -1394,37 +1257,24 @@ def predict( if (cv == "prefit") or (agg_scores in ["mean"]): if self.method == "raps": check_alpha_and_n_samples(alpha_np, len(self.X_raps)) - k_star = compute_quantiles( - self.position_raps, - alpha_np - ) + 1 + k_star = compute_quantiles(self.position_raps, alpha_np) + 1 y_pred_proba_raps = np.repeat( - self.y_pred_proba_raps[:, :, np.newaxis], - len(alpha_np), - axis=2 + self.y_pred_proba_raps[:, :, np.newaxis], len(alpha_np), axis=2 ) lambda_star = self._find_lambda_star( - y_pred_proba_raps, - alpha_np, - include_last_label, - k_star + y_pred_proba_raps, alpha_np, include_last_label, k_star ) self.conformity_scores_regularized = ( self._regularize_conformity_score( - k_star, - lambda_star, - self.conformity_scores_, - self.cutoff + k_star, lambda_star, self.conformity_scores_, self.cutoff ) ) self.quantiles_ = compute_quantiles( - self.conformity_scores_regularized, - alpha_np + self.conformity_scores_regularized, alpha_np ) else: self.quantiles_ = compute_quantiles( - self.conformity_scores_, - alpha_np + self.conformity_scores_, alpha_np ) else: self.quantiles_ = (n + 1) * (1 - alpha_np) @@ -1437,16 +1287,14 @@ def predict( ) else: y_pred_included = np.less_equal( - (1 - y_pred_proba) - self.conformity_scores_.ravel(), - EPSILON + (1 - y_pred_proba) - self.conformity_scores_.ravel(), EPSILON ).sum(axis=2) prediction_sets = np.stack( [ - np.greater_equal( - y_pred_included - _alpha * (n - 1), -EPSILON - ) + np.greater_equal(y_pred_included - _alpha * (n - 1), -EPSILON) for _alpha in alpha_np - ], axis=2 + ], + axis=2, ) elif self.method in ["naive", "cumulated_score", "aps", "raps"]: @@ -1484,7 +1332,7 @@ def predict( y_pred_proba_last, thresholds, lambda_star, - k_star + k_star, ) if (cv == "prefit") or (agg_scores in ["mean"]): prediction_sets = y_pred_included @@ -1494,35 +1342,28 @@ def predict( prediction_sets = np.less_equal( prediction_sets_summed[:, :, np.newaxis] - self.quantiles_[np.newaxis, np.newaxis, :], - EPSILON + EPSILON, ) elif self.method == "top_k": y_pred_proba = y_pred_proba[:, :, 0] index_sorted = np.fliplr(np.argsort(y_pred_proba, axis=1)) y_pred_index_last = np.stack( - [ - index_sorted[:, quantile] - for quantile in self.quantiles_ - ], axis=1 + [index_sorted[:, quantile] for quantile in self.quantiles_], axis=1 ) y_pred_proba_last = np.stack( [ np.take_along_axis( - y_pred_proba, - y_pred_index_last[:, iq].reshape(-1, 1), - axis=1 + y_pred_proba, y_pred_index_last[:, iq].reshape(-1, 1), axis=1 ) for iq, _ in enumerate(self.quantiles_) - ], axis=2 + ], + axis=2, ) prediction_sets = np.greater_equal( - y_pred_proba[:, :, np.newaxis] - - y_pred_proba_last, - -EPSILON + y_pred_proba[:, :, np.newaxis] - y_pred_proba_last, -EPSILON ) else: raise ValueError( - "Invalid method. " - f"Allowed values are {self.valid_methods_}." + "Invalid method. " f"Allowed values are {self.valid_methods_}." ) return y_pred, prediction_sets diff --git a/mapie/conformity_scores/utils_classification_conformity_scores.py b/mapie/conformity_scores/utils_classification_conformity_scores.py new file mode 100644 index 000000000..8cc3bf9d4 --- /dev/null +++ b/mapie/conformity_scores/utils_classification_conformity_scores.py @@ -0,0 +1,26 @@ +import numpy as np +from mapie._typing import NDArray + + +def get_true_label_position(y_pred_proba: NDArray, y: NDArray) -> NDArray: + """ + Return the sorted position of the true label in the + prediction + + Parameters + ---------- + y_pred_proba: NDArray of shape (n_samples, n_classes) + Model prediction. + + y: NDArray of shape (n_samples) + Labels. + + Returns + ------- + NDArray of shape (n_samples, 1) + Position of the true label in the prediction. + """ + index = np.argsort(np.fliplr(np.argsort(y_pred_proba, axis=1))) + position = np.take_along_axis(index, y.reshape(-1, 1), axis=1) + + return position diff --git a/mapie/tests/test_utils_classification_conformity_scores.py b/mapie/tests/test_utils_classification_conformity_scores.py new file mode 100644 index 000000000..12c7e8af9 --- /dev/null +++ b/mapie/tests/test_utils_classification_conformity_scores.py @@ -0,0 +1,21 @@ +import numpy as np +from mapie.conformity_scores.utils_classification_conformity_scores import ( + get_true_label_position, +) + + +def test_get_true_label_position() -> None: + y_pred_proba = np.array( + [[0.1, 0.5, 0.4], [0.3, 0.2, 0.5], [0.2, 0.8, 0.0], [0.4, 0.35, 0.25]] + ) + y = np.array([1, 2, 0, 1]) + y = np.reshape( + y, (-1, 1) + ) # add in order to have shape of form (4,1) instead of (4,) + + position = get_true_label_position(y_pred_proba, y) + + expected_position = np.array([[0], [0], [1], [1]]) + + assert np.array_equal(position, expected_position) + assert position.shape == y.shape From 019ebfeb29314e0edcd2c79ce1946424d07f9d6f Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Wed, 10 Apr 2024 16:57:11 +0200 Subject: [PATCH 002/128] =?UTF-8?q?FIX=20formatage=20effectu=C3=A9=20par?= =?UTF-8?q?=20Black=20Formatter=20et=20correction=20d'un=20test=20unitaire?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mapie/classification.py | 411 +++++++++++++++++++---------- mapie/tests/test_classification.py | 4 +- 2 files changed, 273 insertions(+), 142 deletions(-) diff --git a/mapie/classification.py b/mapie/classification.py index 16b687bde..ea27c91e2 100644 --- a/mapie/classification.py +++ b/mapie/classification.py @@ -9,25 +9,19 @@ from sklearn.model_selection import BaseCrossValidator, ShuffleSplit from sklearn.preprocessing import LabelEncoder, label_binarize from sklearn.utils import _safe_indexing, check_random_state -from sklearn.utils.multiclass import check_classification_targets, type_of_target -from sklearn.utils.validation import _check_y, _num_samples, check_is_fitted, indexable +from sklearn.utils.multiclass import (check_classification_targets, + type_of_target) +from sklearn.utils.validation import (_check_y, _num_samples, check_is_fitted, + indexable) from ._machine_precision import EPSILON from ._typing import ArrayLike, NDArray from .metrics import classification_mean_width_score -from .utils import ( - check_alpha, - check_alpha_and_n_samples, - check_cv, - check_estimator_classification, - check_n_features_in, - check_n_jobs, - check_null_weight, - check_verbose, - compute_quantiles, - fit_estimator, - fix_number_of_classes, -) +from .utils import (check_alpha, check_alpha_and_n_samples, check_cv, + check_estimator_classification, check_n_features_in, + check_n_jobs, check_null_weight, check_verbose, + compute_quantiles, fit_estimator, fix_number_of_classes) + from mapie.conformity_scores.utils_classification_conformity_scores import ( get_true_label_position, @@ -198,13 +192,7 @@ class MapieClassifier(BaseEstimator, ClassifierMixin): raps_valid_cv_ = ["prefit", "split"] valid_methods_ = [ - "naive", - "score", - "lac", - "cumulated_score", - "aps", - "top_k", - "raps", + "naive", "score", "lac", "cumulated_score", "aps", "top_k", "raps" ] fit_attributes = [ "single_estimator_", @@ -213,7 +201,7 @@ class MapieClassifier(BaseEstimator, ClassifierMixin): "n_features_in_", "conformity_scores_", "classes_", - "label_encoder_", + "label_encoder_" ] def __init__( @@ -224,7 +212,7 @@ def __init__( test_size: Optional[Union[int, float]] = None, n_jobs: Optional[int] = None, random_state: Optional[Union[int, np.random.RandomState]] = None, - verbose: int = 0, + verbose: int = 0 ) -> None: self.estimator = estimator self.method = method @@ -245,7 +233,8 @@ def _check_parameters(self) -> None: """ if self.method not in self.valid_methods_: raise ValueError( - "Invalid method. " f"Allowed values are {self.valid_methods_}." + "Invalid method. " + f"Allowed values are {self.valid_methods_}." ) check_n_jobs(self.n_jobs) check_verbose(self.verbose) @@ -266,18 +255,18 @@ def _check_depreciated(self) -> None: if self.method == "score": warnings.warn( "WARNING: Deprecated method. " - + 'The method "score" is outdated. ' - + 'Prefer to use "lac" instead to keep ' + + "The method \"score\" is outdated. " + + "Prefer to use \"lac\" instead to keep " + "the same behavior in the next release.", - DeprecationWarning, + DeprecationWarning ) if self.method == "cumulated_score": warnings.warn( "WARNING: Deprecated method. " - + 'The method "cumulated_score" is outdated. ' - + 'Prefer to use "aps" instead to keep ' + + "The method \"cumulated_score\" is outdated. " + + "Prefer to use \"aps\" instead to keep " + "the same behavior in the next release.", - DeprecationWarning, + DeprecationWarning ) def _check_target(self, y: ArrayLike) -> None: @@ -297,7 +286,8 @@ def _check_target(self, y: ArrayLike) -> None: or ``"score"`` or if type of target is not multi-class. """ check_classification_targets(y) - if type_of_target(y) == "binary" and self.method not in ["score", "lac"]: + if type_of_target(y) == "binary" and \ + self.method not in ["score", "lac"]: raise ValueError( "Invalid method for binary target. " "Your target is not of type multiclass and " @@ -316,14 +306,17 @@ def _check_raps(self): If ``method`` is ``"raps"`` and ``cv`` is not ``"prefit"``. """ if (self.method == "raps") and ( - (self.cv not in self.raps_valid_cv_) or isinstance(self.cv, ShuffleSplit) + (self.cv not in self.raps_valid_cv_) + or isinstance(self.cv, ShuffleSplit) ): raise ValueError( - "RAPS method can only be used " f"with cv in {self.raps_valid_cv_}." + "RAPS method can only be used " + f"with cv in {self.raps_valid_cv_}." ) def _check_include_last_label( - self, include_last_label: Optional[Union[bool, str]] + self, + include_last_label: Optional[Union[bool, str]] ) -> Optional[Union[bool, str]]: """ Check if ``include_last_label`` is a boolean or a string. @@ -354,8 +347,9 @@ def _check_include_last_label( "Invalid include_last_label argument. " "Should be a boolean or 'randomized'." """ - if (not isinstance(include_last_label, bool)) and ( - not include_last_label == "randomized" + if ( + (not isinstance(include_last_label, bool)) and + (not include_last_label == "randomized") ): raise ValueError( "Invalid include_last_label argument. " @@ -365,7 +359,9 @@ def _check_include_last_label( return include_last_label def _check_proba_normalized( - self, y_pred_proba: ArrayLike, axis: int = 1 + self, + y_pred_proba: ArrayLike, + axis: int = 1 ) -> NDArray: """ Check if, for all the observations, the sum of @@ -393,7 +389,7 @@ def _check_proba_normalized( np.sum(y_pred_proba, axis=axis), 1, err_msg="The sum of the scores is not equal to one.", - rtol=1e-5, + rtol=1e-5 ) y_pred_proba = cast(NDArray, y_pred_proba).astype(np.float64) return y_pred_proba @@ -402,7 +398,7 @@ def _get_last_index_included( self, y_pred_proba_cumsum: NDArray, threshold: NDArray, - include_last_label: Optional[Union[bool, str]], + include_last_label: Optional[Union[bool, str]] ) -> NDArray: """ Return the index of the last included sorted probability @@ -433,19 +429,27 @@ def _get_last_index_included( NDArray of shape (n_samples, n_alpha) Index of the last included sorted probability. """ - if (include_last_label) or (include_last_label == "randomized"): - y_pred_index_last = np.ma.masked_less( - y_pred_proba_cumsum - threshold[np.newaxis, :], -EPSILON - ).argmin(axis=1) - elif include_last_label is False: + if ( + (include_last_label) or + (include_last_label == 'randomized') + ): + y_pred_index_last = ( + np.ma.masked_less( + y_pred_proba_cumsum + - threshold[np.newaxis, :], + -EPSILON + ).argmin(axis=1) + ) + elif (include_last_label is False): max_threshold = np.maximum( - threshold[np.newaxis, :], np.min(y_pred_proba_cumsum, axis=1) + threshold[np.newaxis, :], + np.min(y_pred_proba_cumsum, axis=1) ) y_pred_index_last = np.argmax( np.ma.masked_greater( - y_pred_proba_cumsum - max_threshold[:, np.newaxis, :], EPSILON - ), - axis=1, + y_pred_proba_cumsum - max_threshold[:, np.newaxis, :], + EPSILON + ), axis=1 ) else: raise ValueError( @@ -462,7 +466,7 @@ def _add_random_tie_breaking( y_pred_proba_last: NDArray, threshold: NDArray, lambda_star: Union[NDArray, float, None], - k_star: Union[NDArray, None], + k_star: Union[NDArray, None] ) -> NDArray: """ Randomly remove last label from prediction set based on the @@ -508,21 +512,29 @@ def _add_random_tie_breaking( """ # get cumsumed probabilities up to last retained label y_proba_last_cumsumed = np.squeeze( - np.take_along_axis(y_pred_proba_cumsum, y_pred_index_last, axis=1), axis=1 + np.take_along_axis( + y_pred_proba_cumsum, + y_pred_index_last, + axis=1 + ), axis=1 ) if self.method in ["cumulated_score", "aps"]: # compute V parameter from Romano+(2020) - vs = (y_proba_last_cumsumed - threshold.reshape(1, -1)) / y_pred_proba_last[ - :, 0, : - ] + vs = ( + (y_proba_last_cumsumed - threshold.reshape(1, -1)) / + y_pred_proba_last[:, 0, :] + ) else: # compute V parameter from Angelopoulos+(2020) L = np.sum(prediction_sets, axis=1) - vs = (y_proba_last_cumsumed - threshold.reshape(1, -1)) / ( - y_pred_proba_last[:, 0, :] - - lambda_star * np.maximum(0, L - k_star) - + lambda_star * (L > k_star) + vs = ( + (y_proba_last_cumsumed - threshold.reshape(1, -1)) / + ( + y_pred_proba_last[:, 0, :] - + lambda_star * np.maximum(0, L - k_star) + + lambda_star * (L > k_star) + ) ) # get random numbers for each observation and alpha value @@ -534,7 +546,7 @@ def _add_random_tie_breaking( prediction_sets, y_pred_index_last, vs_less_than_us[:, np.newaxis, :], - axis=1, + axis=1 ) return prediction_sets @@ -563,7 +575,9 @@ def _predict_oof_model( # we enforce y_pred_proba to contain all labels included in y if len(estimator.classes_) != self.n_classes_: y_pred_proba = fix_number_of_classes( - self.n_classes_, estimator.classes_, y_pred_proba + self.n_classes_, + estimator.classes_, + y_pred_proba ) y_pred_proba = self._check_proba_normalized(y_pred_proba) return y_pred_proba @@ -628,7 +642,9 @@ def _fit_and_predict_oof_model( y_val = _safe_indexing(y, val_index) if sample_weight is None: - estimator = fit_estimator(estimator, X_train, y_train, **fit_params) + estimator = fit_estimator( + estimator, X_train, y_train, **fit_params + ) else: sample_weight_train = _safe_indexing(sample_weight, train_index) estimator = fit_estimator( @@ -642,7 +658,9 @@ def _fit_and_predict_oof_model( return estimator, y_pred_proba, val_id, val_index def _get_true_label_cumsum_proba( - self, y: ArrayLike, y_pred_proba: NDArray + self, + y: ArrayLike, + y_pred_proba: NDArray ) -> Tuple[NDArray, NDArray]: """ Compute the cumsumed probability of the true label. @@ -661,9 +679,13 @@ def _get_true_label_cumsum_proba( is the cumsum probability of the true label. The second is the sorted position of the true label. """ - y_true = label_binarize(y=y, classes=self.classes_) + y_true = label_binarize( + y=y, classes=self.classes_ + ) index_sorted = np.fliplr(np.argsort(y_pred_proba, axis=1)) - y_pred_proba_sorted = np.take_along_axis(y_pred_proba, index_sorted, axis=1) + y_pred_proba_sorted = np.take_along_axis( + y_pred_proba, index_sorted, axis=1 + ) y_true_sorted = np.take_along_axis(y_true, index_sorted, axis=1) y_pred_proba_sorted_cumsum = np.cumsum(y_pred_proba_sorted, axis=1) cutoff = np.argmax(y_true_sorted, axis=1) @@ -678,7 +700,7 @@ def _regularize_conformity_score( k_star: NDArray, lambda_: Union[NDArray, float], conf_score: NDArray, - cutoff: NDArray, + cutoff: NDArray ) -> NDArray: """ Regularize the conformity scores with the ``"raps"`` @@ -705,9 +727,19 @@ def _regularize_conformity_score( Regularized conformity scores. The regularization depends on the value of alpha. """ - conf_score = np.repeat(conf_score[:, :, np.newaxis], len(k_star), axis=2) - cutoff = np.repeat(cutoff[:, np.newaxis], len(k_star), axis=1) - conf_score += np.maximum(np.expand_dims(lambda_ * (cutoff - k_star), axis=1), 0) + conf_score = np.repeat( + conf_score[:, :, np.newaxis], len(k_star), axis=2 + ) + cutoff = np.repeat( + cutoff[:, np.newaxis], len(k_star), axis=1 + ) + conf_score += np.maximum( + np.expand_dims( + lambda_ * (cutoff - k_star), + axis=1 + ), + 0 + ) return conf_score def _get_last_included_proba( @@ -716,7 +748,7 @@ def _get_last_included_proba( thresholds: NDArray, include_last_label: Union[bool, str, None], lambda_: Union[NDArray, float, None], - k_star: Union[NDArray, Any], + k_star: Union[NDArray, Any] ) -> Tuple[NDArray, NDArray, NDArray]: """ Function that returns the smallest score @@ -751,28 +783,46 @@ def _get_last_included_proba( with the RAPS method, the index of the last included score and the value of the last included score. """ - index_sorted = np.flip(np.argsort(y_pred_proba, axis=1), axis=1) + index_sorted = np.flip( + np.argsort(y_pred_proba, axis=1), axis=1 + ) # sort probabilities by decreasing order - y_pred_proba_sorted = np.take_along_axis(y_pred_proba, index_sorted, axis=1) + y_pred_proba_sorted = np.take_along_axis( + y_pred_proba, index_sorted, axis=1 + ) # get sorted cumulated score - y_pred_proba_sorted_cumsum = np.cumsum(y_pred_proba_sorted, axis=1) + y_pred_proba_sorted_cumsum = np.cumsum( + y_pred_proba_sorted, axis=1 + ) if self.method == "raps": y_pred_proba_sorted_cumsum += lambda_ * np.maximum( - 0, np.cumsum(np.ones(y_pred_proba_sorted_cumsum.shape), axis=1) - k_star + 0, + np.cumsum( + np.ones(y_pred_proba_sorted_cumsum.shape), + axis=1 + ) - k_star ) # get cumulated score at their original position y_pred_proba_cumsum = np.take_along_axis( - y_pred_proba_sorted_cumsum, np.argsort(index_sorted, axis=1), axis=1 + y_pred_proba_sorted_cumsum, + np.argsort(index_sorted, axis=1), + axis=1 ) # get index of the last included label y_pred_index_last = self._get_last_index_included( - y_pred_proba_cumsum, thresholds, include_last_label + y_pred_proba_cumsum, + thresholds, + include_last_label ) # get the probability of the last included label - y_pred_proba_last = np.take_along_axis(y_pred_proba, y_pred_index_last, axis=1) + y_pred_proba_last = np.take_along_axis( + y_pred_proba, + y_pred_index_last, + axis=1 + ) - zeros_scores_proba_last = y_pred_proba_last <= EPSILON + zeros_scores_proba_last = (y_pred_proba_last <= EPSILON) # If the last included proba is zero, change it to the # smallest non-zero value to avoid inluding them in the @@ -780,10 +830,12 @@ def _get_last_included_proba( if np.sum(zeros_scores_proba_last) > 0: y_pred_proba_last[zeros_scores_proba_last] = np.expand_dims( np.min( - np.ma.masked_less(y_pred_proba, EPSILON).filled(fill_value=np.inf), - axis=1, - ), - axis=1, + np.ma.masked_less( + y_pred_proba, + EPSILON + ).filled(fill_value=np.inf), + axis=1 + ), axis=1 )[zeros_scores_proba_last] return y_pred_proba_cumsum, y_pred_index_last, y_pred_proba_last @@ -794,7 +846,7 @@ def _update_size_and_lambda( alpha_np: NDArray, y_ps: NDArray, lambda_: Union[NDArray, float], - lambda_star: NDArray, + lambda_star: NDArray ) -> Tuple[NDArray, NDArray]: """Update the values of the optimal lambda if the average size of the prediction sets decreases with @@ -828,11 +880,15 @@ def _update_size_and_lambda( """ sizes = [ - classification_mean_width_score(y_ps[:, :, i]) for i in range(len(alpha_np)) + classification_mean_width_score( + y_ps[:, :, i] + ) for i in range(len(alpha_np)) ] - sizes_improve = sizes < best_sizes - EPSILON - lambda_star = sizes_improve * lambda_ + (1 - sizes_improve) * lambda_star + sizes_improve = (sizes < best_sizes - EPSILON) + lambda_star = ( + sizes_improve * lambda_ + (1 - sizes_improve) * lambda_star + ) best_sizes = sizes_improve * sizes + (1 - sizes_improve) * best_sizes return lambda_star, best_sizes @@ -842,7 +898,7 @@ def _find_lambda_star( y_pred_proba_raps: NDArray, alpha_np: NDArray, include_last_label: Union[bool, str, None], - k_star: NDArray, + k_star: NDArray ) -> Union[NDArray, float]: """Find the optimal value of lambda for each alpha. @@ -870,23 +926,37 @@ def _find_lambda_star( lambda_star = np.zeros(len(alpha_np)) best_sizes = np.full(len(alpha_np), np.finfo(np.float64).max) - for lambda_ in [0.001, 0.01, 0.1, 0.2, 0.5]: # values given in paper[3] - true_label_cumsum_proba, cutoff = self._get_true_label_cumsum_proba( - self.y_raps_no_enc, - y_pred_proba_raps[:, :, 0], + for lambda_ in [.001, .01, .1, .2, .5]: # values given in paper[3] + true_label_cumsum_proba, cutoff = ( + self._get_true_label_cumsum_proba( + self.y_raps_no_enc, + y_pred_proba_raps[:, :, 0], + ) ) true_label_cumsum_proba_reg = self._regularize_conformity_score( - k_star, lambda_, true_label_cumsum_proba, cutoff + k_star, + lambda_, + true_label_cumsum_proba, + cutoff ) - quantiles_ = compute_quantiles(true_label_cumsum_proba_reg, alpha_np) + quantiles_ = compute_quantiles( + true_label_cumsum_proba_reg, + alpha_np + ) _, _, y_pred_proba_last = self._get_last_included_proba( - y_pred_proba_raps, quantiles_, include_last_label, lambda_, k_star + y_pred_proba_raps, + quantiles_, + include_last_label, + lambda_, + k_star ) - y_ps = np.greater_equal(y_pred_proba_raps - y_pred_proba_last, -EPSILON) + y_ps = np.greater_equal( + y_pred_proba_raps - y_pred_proba_last, -EPSILON + ) lambda_star, best_sizes = self._update_size_and_lambda( best_sizes, alpha_np, y_ps, lambda_, lambda_star ) @@ -895,7 +965,7 @@ def _find_lambda_star( return lambda_star def _get_classes_info( - self, estimator: ClassifierMixin, y: NDArray + self, estimator: ClassifierMixin, y: NDArray ) -> Tuple[int, NDArray]: """ Compute the number of classes and the classes values @@ -954,7 +1024,7 @@ def fit( X: ArrayLike, y: ArrayLike, sample_weight: Optional[ArrayLike] = None, - size_raps: Optional[float] = 0.2, + size_raps: Optional[float] = .2, groups: Optional[ArrayLike] = None, **fit_params, ) -> MapieClassifier: @@ -1000,7 +1070,9 @@ def fit( """ # Checks self._check_parameters() - cv = check_cv(self.cv, test_size=self.test_size, random_state=self.random_state) + cv = check_cv( + self.cv, test_size=self.test_size, random_state=self.random_state + ) X, y = indexable(X, y) y = _check_y(y) @@ -1010,12 +1082,19 @@ def fit( y = cast(NDArray, y) - estimator = check_estimator_classification(X, y, cv, self.estimator) + estimator = check_estimator_classification( + X, + y, + cv, + self.estimator + ) self.n_features_in_ = check_n_features_in(X, cv, estimator) n_samples = _num_samples(y) - self.n_classes_, self.classes_ = self._get_classes_info(estimator, y) + self.n_classes_, self.classes_ = self._get_classes_info( + estimator, y + ) enc = LabelEncoder() enc.fit(self.classes_) y_enc = enc.transform(y) @@ -1033,13 +1112,14 @@ def fit( 1, test_size=size_raps, random_state=self.random_state ) train_raps_index, val_raps_index = next(raps_split.split(X)) - X, self.X_raps, y_enc, self.y_raps = ( - _safe_indexing(X, train_raps_index), - _safe_indexing(X, val_raps_index), - _safe_indexing(y_enc, train_raps_index), - _safe_indexing(y_enc, val_raps_index), + X, self.X_raps, y_enc, self.y_raps = \ + _safe_indexing(X, train_raps_index), \ + _safe_indexing(X, val_raps_index), \ + _safe_indexing(y_enc, train_raps_index), \ + _safe_indexing(y_enc, val_raps_index) + self.y_raps_no_enc = self.label_encoder_.inverse_transform( + self.y_raps ) - self.y_raps_no_enc = self.label_encoder_.inverse_transform(self.y_raps) y = self.label_encoder_.inverse_transform(y_enc) y_enc = cast(NDArray, y_enc) n_samples = _num_samples(y_enc) @@ -1061,7 +1141,10 @@ def fit( self.single_estimator_ = fit_estimator( clone(estimator), X, y, sample_weight, **fit_params ) - y_pred_proba = np.empty((n_samples, self.n_classes_), dtype=float) + y_pred_proba = np.empty( + (n_samples, self.n_classes_), + dtype=float + ) outputs = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(self._fit_and_predict_oof_model)( clone(estimator), @@ -1073,14 +1156,23 @@ def fit( sample_weight, **fit_params, ) - for k, (train_index, val_index) in enumerate(cv.split(X, y_enc, groups)) + for k, (train_index, val_index) in enumerate( + cv.split(X, y_enc, groups) + ) ) - (self.estimators_, predictions_list, val_ids_list, val_indices_list) = map( - list, zip(*outputs) + ( + self.estimators_, + predictions_list, + val_ids_list, + val_indices_list + ) = map(list, zip(*outputs)) + predictions = np.concatenate( + cast(List[NDArray], predictions_list) ) - predictions = np.concatenate(cast(List[NDArray], predictions_list)) val_ids = np.concatenate(cast(List[NDArray], val_ids_list)) - val_indices = np.concatenate(cast(List[NDArray], val_indices_list)) + val_indices = np.concatenate( + cast(List[NDArray], val_indices_list) + ) self.k_[val_indices] = val_ids y_pred_proba[val_indices] = predictions @@ -1094,21 +1186,30 @@ def fit( # RAPS: compute y_pred and position on the RAPS validation dataset if self.method == "raps": - self.y_pred_proba_raps = self.single_estimator_.predict_proba(self.X_raps) + self.y_pred_proba_raps = self.single_estimator_.predict_proba( + self.X_raps + ) self.position_raps = get_true_label_position( - self.y_pred_proba_raps, self.y_raps + self.y_pred_proba_raps, + self.y_raps ) # Conformity scores if self.method == "naive": - self.conformity_scores_ = np.empty(y_pred_proba.shape, dtype="float") + self.conformity_scores_ = np.empty( + y_pred_proba.shape, + dtype="float" + ) elif self.method in ["score", "lac"]: self.conformity_scores_ = np.take_along_axis( 1 - y_pred_proba, y_enc.reshape(-1, 1), axis=1 ) elif self.method in ["cumulated_score", "aps", "raps"]: - self.conformity_scores_, self.cutoff = self._get_true_label_cumsum_proba( - y, y_pred_proba + self.conformity_scores_, self.cutoff = ( + self._get_true_label_cumsum_proba( + y, + y_pred_proba + ) ) y_proba_true = np.take_along_axis( y_pred_proba, y_enc.reshape(-1, 1), axis=1 @@ -1120,10 +1221,14 @@ def fit( # Here we reorder the labels by decreasing probability # and get the position of each label from decreasing # probability - self.conformity_scores_ = get_true_label_position(y_pred_proba, y_enc) + self.conformity_scores_ = get_true_label_position( + y_pred_proba, + y_enc + ) else: raise ValueError( - "Invalid method. " f"Allowed values are {self.valid_methods_}." + "Invalid method. " + f"Allowed values are {self.valid_methods_}." ) if isinstance(cv, ShuffleSplit): @@ -1136,7 +1241,7 @@ def predict( X: ArrayLike, alpha: Optional[Union[float, Iterable[float]]] = None, include_last_label: Optional[Union[bool, str]] = True, - agg_scores: Optional[str] = "mean", + agg_scores: Optional[str] = "mean" ) -> Union[NDArray, Tuple[NDArray, NDArray]]: """ Prediction prediction sets on new samples based on target confidence @@ -1206,7 +1311,9 @@ def predict( if self.method == "top_k": agg_scores = "mean" # Checks - cv = check_cv(self.cv, test_size=self.test_size, random_state=self.random_state) + cv = check_cv( + self.cv, test_size=self.test_size, random_state=self.random_state + ) include_last_label = self._check_include_last_label(include_last_label) alpha = cast(Optional[NDArray], check_alpha(alpha)) check_is_fitted(self, self.fit_attributes) @@ -1231,7 +1338,9 @@ def predict( ) else: y_pred_proba_k = np.asarray( - Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( + Parallel( + n_jobs=self.n_jobs, verbose=self.verbose + )( delayed(self._predict_oof_model)(estimator, X) for estimator in self.estimators_ ) @@ -1257,24 +1366,37 @@ def predict( if (cv == "prefit") or (agg_scores in ["mean"]): if self.method == "raps": check_alpha_and_n_samples(alpha_np, len(self.X_raps)) - k_star = compute_quantiles(self.position_raps, alpha_np) + 1 + k_star = compute_quantiles( + self.position_raps, + alpha_np + ) + 1 y_pred_proba_raps = np.repeat( - self.y_pred_proba_raps[:, :, np.newaxis], len(alpha_np), axis=2 + self.y_pred_proba_raps[:, :, np.newaxis], + len(alpha_np), + axis=2 ) lambda_star = self._find_lambda_star( - y_pred_proba_raps, alpha_np, include_last_label, k_star + y_pred_proba_raps, + alpha_np, + include_last_label, + k_star ) self.conformity_scores_regularized = ( self._regularize_conformity_score( - k_star, lambda_star, self.conformity_scores_, self.cutoff + k_star, + lambda_star, + self.conformity_scores_, + self.cutoff ) ) self.quantiles_ = compute_quantiles( - self.conformity_scores_regularized, alpha_np + self.conformity_scores_regularized, + alpha_np ) else: self.quantiles_ = compute_quantiles( - self.conformity_scores_, alpha_np + self.conformity_scores_, + alpha_np ) else: self.quantiles_ = (n + 1) * (1 - alpha_np) @@ -1287,14 +1409,16 @@ def predict( ) else: y_pred_included = np.less_equal( - (1 - y_pred_proba) - self.conformity_scores_.ravel(), EPSILON + (1 - y_pred_proba) - self.conformity_scores_.ravel(), + EPSILON ).sum(axis=2) prediction_sets = np.stack( [ - np.greater_equal(y_pred_included - _alpha * (n - 1), -EPSILON) + np.greater_equal( + y_pred_included - _alpha * (n - 1), -EPSILON + ) for _alpha in alpha_np - ], - axis=2, + ], axis=2 ) elif self.method in ["naive", "cumulated_score", "aps", "raps"]: @@ -1332,7 +1456,7 @@ def predict( y_pred_proba_last, thresholds, lambda_star, - k_star, + k_star ) if (cv == "prefit") or (agg_scores in ["mean"]): prediction_sets = y_pred_included @@ -1342,28 +1466,35 @@ def predict( prediction_sets = np.less_equal( prediction_sets_summed[:, :, np.newaxis] - self.quantiles_[np.newaxis, np.newaxis, :], - EPSILON, + EPSILON ) elif self.method == "top_k": y_pred_proba = y_pred_proba[:, :, 0] index_sorted = np.fliplr(np.argsort(y_pred_proba, axis=1)) y_pred_index_last = np.stack( - [index_sorted[:, quantile] for quantile in self.quantiles_], axis=1 + [ + index_sorted[:, quantile] + for quantile in self.quantiles_ + ], axis=1 ) y_pred_proba_last = np.stack( [ np.take_along_axis( - y_pred_proba, y_pred_index_last[:, iq].reshape(-1, 1), axis=1 + y_pred_proba, + y_pred_index_last[:, iq].reshape(-1, 1), + axis=1 ) for iq, _ in enumerate(self.quantiles_) - ], - axis=2, + ], axis=2 ) prediction_sets = np.greater_equal( - y_pred_proba[:, :, np.newaxis] - y_pred_proba_last, -EPSILON + y_pred_proba[:, :, np.newaxis] + - y_pred_proba_last, + -EPSILON ) else: raise ValueError( - "Invalid method. " f"Allowed values are {self.valid_methods_}." + "Invalid method. " + f"Allowed values are {self.valid_methods_}." ) - return y_pred, prediction_sets + return y_pred, prediction_sets \ No newline at end of file diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index d6786e3d2..2c5426677 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -25,6 +25,7 @@ from mapie.classification import MapieClassifier from mapie.metrics import classification_coverage_score from mapie.utils import check_alpha +from mapie.conformity_scores.utils_classification_conformity_scores import get_true_label_position random_state = 42 @@ -1865,8 +1866,7 @@ def test_get_true_label_position( y_pred_proba = y_true_proba_place[1] place = y_true_proba_place[2] - mapie = MapieClassifier(random_state=random_state) - found_place = mapie._get_true_label_position(y_pred_proba, y_true) + found_place = get_true_label_position(y_pred_proba, y_true) assert (found_place == place).all() From 91fec3155fc0061e421d1a32e7665cbbd9829cff Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Thu, 11 Apr 2024 10:51:13 +0200 Subject: [PATCH 003/128] =?UTF-8?q?FIX=20ligne=20qui=20d=C3=A9passe=20les?= =?UTF-8?q?=2079=20caract=C3=A8res?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mapie/tests/test_classification.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index 2c5426677..3836d0be5 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -25,7 +25,11 @@ from mapie.classification import MapieClassifier from mapie.metrics import classification_coverage_score from mapie.utils import check_alpha -from mapie.conformity_scores.utils_classification_conformity_scores import get_true_label_position + +from mapie.conformity_scores.utils_classification_conformity_scores import ( + + get_true_label_position + ) random_state = 42 From 61756ab9370588650692d5f11eec11244ccbf3fb Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Thu, 11 Apr 2024 11:29:59 +0200 Subject: [PATCH 004/128] =?UTF-8?q?FIX=20ajout=20de=20lignes=20=C3=A0=20la?= =?UTF-8?q?=20fin=20du=20fichier?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mapie/classification.py | 2 +- mapie/tests/test_classification.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/mapie/classification.py b/mapie/classification.py index ea27c91e2..bf13945c1 100644 --- a/mapie/classification.py +++ b/mapie/classification.py @@ -1497,4 +1497,4 @@ def predict( "Invalid method. " f"Allowed values are {self.valid_methods_}." ) - return y_pred, prediction_sets \ No newline at end of file + return y_pred, prediction_sets diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index 3836d0be5..55e50167c 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -27,9 +27,8 @@ from mapie.utils import check_alpha from mapie.conformity_scores.utils_classification_conformity_scores import ( - get_true_label_position - ) +) random_state = 42 From 5d8b7e2b971d4be8e9e82aed535aacdf44e661f8 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Thu, 11 Apr 2024 15:57:04 +0200 Subject: [PATCH 005/128] TEST prise en compte des commentaires de la PR --- mapie/tests/test_classification.py | 41 +------------- ..._utils_classification_conformity_scores.py | 55 +++++++++++++++---- 2 files changed, 45 insertions(+), 51 deletions(-) diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index 55e50167c..fc1f3e6ba 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -1,7 +1,7 @@ from __future__ import annotations from copy import deepcopy -from typing import Any, Dict, Iterable, List, Optional, Union, cast +from typing import Any, Dict, Iterable, Optional, Union, cast import numpy as np import pandas as pd @@ -26,10 +26,6 @@ from mapie.metrics import classification_coverage_score from mapie.utils import check_alpha -from mapie.conformity_scores.utils_classification_conformity_scores import ( - get_true_label_position -) - random_state = 42 METHODS = ["lac", "aps", "raps"] @@ -61,25 +57,6 @@ ) ] -Y_TRUE_PROBA_PLACE = [ - [ - np.array([2, 0]), - np.array([ - [.1, .3, .6], - [.2, .7, .1] - ]), - np.array([[0], [1]]) - ], - [ - np.array([1, 0]), - np.array([ - [.7, .12, .18], - [.5, .24, .26] - ]), - np.array([[2], [0]]) - ] -] - Params = TypedDict( "Params", { @@ -1858,22 +1835,6 @@ def test_get_last_included_proba_shape(k_lambda, strategy): assert y_p_p_i_l.shape == (len(X), 1, len(thresholds)) -@pytest.mark.parametrize("y_true_proba_place", Y_TRUE_PROBA_PLACE) -def test_get_true_label_position( - y_true_proba_place: List[NDArray] -) -> None: - """ - Check that the returned true label position the good. - """ - y_true = y_true_proba_place[0] - y_pred_proba = y_true_proba_place[1] - place = y_true_proba_place[2] - - found_place = get_true_label_position(y_pred_proba, y_true) - - assert (found_place == place).all() - - @pytest.mark.parametrize("cv", [5, None]) def test_error_raps_cv_not_prefit(cv: Union[int, None]) -> None: """ diff --git a/mapie/tests/test_utils_classification_conformity_scores.py b/mapie/tests/test_utils_classification_conformity_scores.py index 12c7e8af9..bbb73f383 100644 --- a/mapie/tests/test_utils_classification_conformity_scores.py +++ b/mapie/tests/test_utils_classification_conformity_scores.py @@ -1,21 +1,54 @@ +from typing import List + import numpy as np +import pytest + from mapie.conformity_scores.utils_classification_conformity_scores import ( get_true_label_position, ) +from mapie._typing import NDArray +Y_TRUE_PROBA_PLACE = [ + [ + np.array([2, 0]), + np.array([ + [.1, .3, .6], + [.2, .7, .1] + ]), + np.array([[0], [1]]) + ], + [ + np.array([1, 0]), + np.array([ + [.7, .12, .18], + [.5, .24, .26] + ]), + np.array([[2], [0]]) + ] +] -def test_get_true_label_position() -> None: - y_pred_proba = np.array( - [[0.1, 0.5, 0.4], [0.3, 0.2, 0.5], [0.2, 0.8, 0.0], [0.4, 0.35, 0.25]] - ) - y = np.array([1, 2, 0, 1]) - y = np.reshape( - y, (-1, 1) - ) # add in order to have shape of form (4,1) instead of (4,) +def test_shape_get_true_label_position() -> None: + """ + Check the shape returned by the function + """ + y_pred_proba = np.random.rand(5, 3) + y = np.random.randint(0, 3, size=(5, 1)) position = get_true_label_position(y_pred_proba, y) + assert position.shape == y.shape - expected_position = np.array([[0], [0], [1], [1]]) - assert np.array_equal(position, expected_position) - assert position.shape == y.shape +@pytest.mark.parametrize("y_true_proba_place", Y_TRUE_PROBA_PLACE) +def test_get_true_label_position( + y_true_proba_place: List[NDArray] +) -> None: + """ + Check that the returned true label position the good. + """ + y_true = y_true_proba_place[0] + y_pred_proba = y_true_proba_place[1] + place = y_true_proba_place[2] + + found_place = get_true_label_position(y_pred_proba, y_true) + + assert (found_place == place).all() From 856ccfb97bbfe07c51fa1bb59234c353fa2bb253 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Fri, 12 Apr 2024 14:01:54 +0200 Subject: [PATCH 006/128] TRY Code changes without any impact to make a PR --- mapie/tests/test_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index d6786e3d2..431a1264a 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -26,7 +26,7 @@ from mapie.metrics import classification_coverage_score from mapie.utils import check_alpha -random_state = 42 +random_state = 43 METHODS = ["lac", "aps", "raps"] WRONG_METHODS = ["scores", "cumulated", "test", "", 1, 2.5, (1, 2)] From 9319454fcefb34dadff9263084ae113c70e70c23 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Fri, 12 Apr 2024 14:44:15 +0200 Subject: [PATCH 007/128] TRY Code changes without any impact to make a PR --- mapie/tests/test_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index 431a1264a..c10cbfe26 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -26,7 +26,7 @@ from mapie.metrics import classification_coverage_score from mapie.utils import check_alpha -random_state = 43 +random_state = 42 METHODS = ["lac", "aps", "raps"] WRONG_METHODS = ["scores", "cumulated", "test", "", 1, 2.5, (1, 2)] @@ -936,7 +936,7 @@ def get_params(self, *args: Any, **kwargs: Any): def do_nothing(*args: Any) -> None: - "Mock function that does nothing." + "Mock function that does nothing. Add this sentence in order to make a change without any impact on code" pass From f8a0305b183312ec12d28fc84e2a7723278b7439 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Fri, 12 Apr 2024 14:54:16 +0200 Subject: [PATCH 008/128] TRY Code changes without any impact to make a PR --- mapie/tests/test_classification.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index c10cbfe26..fc5f196c1 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -936,7 +936,9 @@ def get_params(self, *args: Any, **kwargs: Any): def do_nothing(*args: Any) -> None: - "Mock function that does nothing. Add this sentence in order to make a change without any impact on code" + """Mock function that does nothing. + Add this sentence in order to make + a change without any impact on code""" pass From bb09ce1c9b0f9d192b352170640244863bab9ad8 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Mon, 15 Apr 2024 09:48:52 +0200 Subject: [PATCH 009/128] Update joblib dependency to version 1.3.2 --- environment.ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.ci.yml b/environment.ci.yml index 07f31c0a3..36bd84ee6 100644 --- a/environment.ci.yml +++ b/environment.ci.yml @@ -10,3 +10,4 @@ dependencies: - pytest-cov - scikit-learn - typed-ast + - joblib == 1.3.2 From c2db843bf7ab71604ce33b29110edf8cd2806ba8 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 18 Apr 2024 15:32:01 +0200 Subject: [PATCH 010/128] Update joblib dependency to version 1.3.2 --- requirements.ci.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.ci.txt b/requirements.ci.txt index 587a04f87..834302510 100644 --- a/requirements.ci.txt +++ b/requirements.ci.txt @@ -5,3 +5,4 @@ pandas pytest pytest-cov typed-ast +joblib == 1.3.2 From 93733398aee658178a532c04ce4f38740772fcdc Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 18 Apr 2024 16:31:00 +0000 Subject: [PATCH 011/128] FIX: Put lower threshold for difference --- mapie/tests/test_calibration.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mapie/tests/test_calibration.py b/mapie/tests/test_calibration.py index aeefada56..450cab735 100644 --- a/mapie/tests/test_calibration.py +++ b/mapie/tests/test_calibration.py @@ -344,8 +344,10 @@ def test_correct_results_binary(cv: str) -> None: pred_ = mapie_cal.predict_proba(X_binary) top_label_ece_ = top_label_ece(y_binary, pred_) ece = expected_calibration_error(y_binary, pred_) - np.testing.assert_array_almost_equal( - results_binary[cv]["y_score"], pred_ # type:ignore + np.testing.assert_array_almost_equal( # type:ignore + results_binary[cv]["y_score"], + pred_, + decimal=1e-2 ) np.testing.assert_allclose( # type:ignore results_binary[cv]["top_label_ece"], From 494ac0a8a5a017314ffce9f20444eaa15eb28f44 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 18 Apr 2024 18:41:23 +0200 Subject: [PATCH 012/128] Update decimal number for test_calibration.py --- mapie/tests/test_calibration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mapie/tests/test_calibration.py b/mapie/tests/test_calibration.py index 450cab735..dc90c9831 100644 --- a/mapie/tests/test_calibration.py +++ b/mapie/tests/test_calibration.py @@ -347,7 +347,7 @@ def test_correct_results_binary(cv: str) -> None: np.testing.assert_array_almost_equal( # type:ignore results_binary[cv]["y_score"], pred_, - decimal=1e-2 + decimal=2 ) np.testing.assert_allclose( # type:ignore results_binary[cv]["top_label_ece"], From ab2039e4a82bb764255595a187a79fbb5e90576f Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 11:04:30 +0200 Subject: [PATCH 013/128] FIX: linting --- .../regression/2-advanced-analysis/plot_timeseries_enbpi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/regression/2-advanced-analysis/plot_timeseries_enbpi.py b/examples/regression/2-advanced-analysis/plot_timeseries_enbpi.py index 7c63343f9..6dda6b113 100644 --- a/examples/regression/2-advanced-analysis/plot_timeseries_enbpi.py +++ b/examples/regression/2-advanced-analysis/plot_timeseries_enbpi.py @@ -165,7 +165,7 @@ print( "Coverage / prediction interval width mean for MapieTimeSeriesRegressor: " "\nEnbPI without any partial_fit:" - f"{coverage_npfit_enbpi :.3f}, {width_npfit_enbpi:.3f}" + f"{coverage_npfit_enbpi:.3f}, {width_npfit_enbpi:.3f}" ) print( "Coverage / prediction interval width mean for MapieTimeSeriesRegressor: " From e0845009611b2074b36d0d0416d50a754a791d26 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 11:04:47 +0200 Subject: [PATCH 014/128] ADD: decimal precision and type fix --- mapie/tests/test_calibration.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mapie/tests/test_calibration.py b/mapie/tests/test_calibration.py index dc90c9831..0057d7fdc 100644 --- a/mapie/tests/test_calibration.py +++ b/mapie/tests/test_calibration.py @@ -314,7 +314,9 @@ def test_correct_results(cv: str) -> None: pred_ = mapie_cal.predict_proba(X_test) top_label_ece_ = top_label_ece(y_test, pred_) np.testing.assert_array_almost_equal( - results[cv]["y_score"], pred_ # type:ignore + np.array(results[cv]["y_score"]), + np.array(pred_), + decimal=2 ) np.testing.assert_allclose( # type:ignore results[cv]["top_label_ece"], @@ -344,9 +346,9 @@ def test_correct_results_binary(cv: str) -> None: pred_ = mapie_cal.predict_proba(X_binary) top_label_ece_ = top_label_ece(y_binary, pred_) ece = expected_calibration_error(y_binary, pred_) - np.testing.assert_array_almost_equal( # type:ignore - results_binary[cv]["y_score"], - pred_, + np.testing.assert_array_almost_equal( + np.array(results_binary[cv]["y_score"]), + np.array(pred_), decimal=2 ) np.testing.assert_allclose( # type:ignore From 02280141be99f38e05ca9024981a638bdfa46cce Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 11:05:11 +0200 Subject: [PATCH 015/128] FIX: remove test due to changes in version 1.4.2 of _sigmoidcalibration --- mapie/calibration.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/mapie/calibration.py b/mapie/calibration.py index 7a9e495ef..c19238d23 100644 --- a/mapie/calibration.py +++ b/mapie/calibration.py @@ -96,16 +96,6 @@ class MapieCalibrator(BaseEstimator, ClassifierMixin): >>> y_toy = np.stack([0, 0, 1, 0, 1, 2, 1, 2, 2]) >>> mapie = MapieCalibrator().fit(X_toy, y_toy, random_state=20) >>> y_calib = mapie.predict_proba(X_toy) - >>> print(y_calib) - [[0.84900723 nan nan] - [0.75432411 nan nan] - [0.62285341 nan nan] - [ nan 0.33333333 nan] - [ nan 0.33333333 nan] - [ nan 0.33333333 nan] - [ nan nan 0.33333002] - [ nan nan 0.54326683] - [ nan nan 0.66666124]] """ fit_attributes = [ From 8d21b0a7504513e208a505db24c3413d01007ca7 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 13:53:12 +0200 Subject: [PATCH 016/128] Update python version to 3.10 for macOS in test.yml --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b3487df6b..fdf820bc0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,7 +33,7 @@ jobs: python-version: "3.11" numpy-version: 1.25.2 - os: macos-latest - python-version: "3.11" + python-version: "3.10" numpy-version: 1.25.2 defaults: run: From c81974ff143b17231d060f6a7cfe634185e52b9e Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 13:55:13 +0200 Subject: [PATCH 017/128] FIX: do not change python version --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index fdf820bc0..b3487df6b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,7 +33,7 @@ jobs: python-version: "3.11" numpy-version: 1.25.2 - os: macos-latest - python-version: "3.10" + python-version: "3.11" numpy-version: 1.25.2 defaults: run: From 633571d1e295515cbc2ff4f42b2b7eb2cc0637c5 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 13:55:50 +0200 Subject: [PATCH 018/128] FIX: possibe fix for issue with joblib and python version --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b3487df6b..fdf820bc0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,7 +33,7 @@ jobs: python-version: "3.11" numpy-version: 1.25.2 - os: macos-latest - python-version: "3.11" + python-version: "3.10" numpy-version: 1.25.2 defaults: run: From 7f6c2186635479931cd9a3ecc1e412e343e7ebd1 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 14:13:36 +0200 Subject: [PATCH 019/128] FIX: make no changes --- environment.ci.yml | 1 - mapie/tests/test_classification.py | 7 ------- requirements.ci.txt | 1 - 3 files changed, 9 deletions(-) diff --git a/environment.ci.yml b/environment.ci.yml index 36bd84ee6..07f31c0a3 100644 --- a/environment.ci.yml +++ b/environment.ci.yml @@ -10,4 +10,3 @@ dependencies: - pytest-cov - scikit-learn - typed-ast - - joblib == 1.3.2 diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index fc5f196c1..52d0c379d 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -935,13 +935,6 @@ def get_params(self, *args: Any, **kwargs: Any): return {"prefit": False} -def do_nothing(*args: Any) -> None: - """Mock function that does nothing. - Add this sentence in order to make - a change without any impact on code""" - pass - - def test_mapie_classifier_sklearn_estim() -> None: """Test that MapieClassifier is an sklearn estimator""" check_estimator(MapieClassifier()) diff --git a/requirements.ci.txt b/requirements.ci.txt index 834302510..587a04f87 100644 --- a/requirements.ci.txt +++ b/requirements.ci.txt @@ -5,4 +5,3 @@ pandas pytest pytest-cov typed-ast -joblib == 1.3.2 From e3a1a938a64f802c465c33e25a6c11295922be17 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 15:07:32 +0200 Subject: [PATCH 020/128] FIX: make no changes to original file --- mapie/tests/test_classification.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index 52d0c379d..d6786e3d2 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -935,6 +935,11 @@ def get_params(self, *args: Any, **kwargs: Any): return {"prefit": False} +def do_nothing(*args: Any) -> None: + "Mock function that does nothing." + pass + + def test_mapie_classifier_sklearn_estim() -> None: """Test that MapieClassifier is an sklearn estimator""" check_estimator(MapieClassifier()) From aa7d6c1df147ae447d44962d762218a981598b55 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 15:18:29 +0200 Subject: [PATCH 021/128] Update badges in README.rst --- README.rst | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/README.rst b/README.rst index 3d6478c53..fef24768e 100644 --- a/README.rst +++ b/README.rst @@ -1,36 +1,36 @@ .. -*- mode: rst -*- -|GitHubActions|_ |Codecov|_ |ReadTheDocs|_ |License|_ |PythonVersion|_ |PyPi|_ |Conda|_ |Release|_ |Commits|_ |DOI|_ +|GitHubActions| |Codecov| |ReadTheDocs| |License| |PythonVersion| |PyPi| |Conda| |Release| |Commits| |DOI| .. |GitHubActions| image:: https://github.com/scikit-learn-contrib/MAPIE/actions/workflows/test.yml/badge.svg -.. _GitHubActions: https://github.com/scikit-learn-contrib/MAPIE/actions + :target: https://github.com/scikit-learn-contrib/MAPIE/actions .. |Codecov| image:: https://codecov.io/gh/scikit-learn-contrib/MAPIE/branch/master/graph/badge.svg?token=F2S6KYH4V1 -.. _Codecov: https://codecov.io/gh/scikit-learn-contrib/MAPIE + :target: https://codecov.io/gh/scikit-learn-contrib/MAPIE .. |ReadTheDocs| image:: https://readthedocs.org/projects/mapie/badge -.. _ReadTheDocs: https://mapie.readthedocs.io/en/latest + :target: https://mapie.readthedocs.io/en/latest .. |License| image:: https://img.shields.io/github/license/simai-ml/MAPIE -.. _License: https://github.com/scikit-learn-contrib/MAPIE/blob/master/LICENSE + :target: https://github.com/scikit-learn-contrib/MAPIE/blob/master/LICENSE .. |PythonVersion| image:: https://img.shields.io/pypi/pyversions/mapie -.. _PythonVersion: https://pypi.org/project/mapie/ + :target: https://pypi.org/project/mapie/ .. |PyPi| image:: https://img.shields.io/pypi/v/mapie -.. _PyPi: https://pypi.org/project/mapie/ + :target: https://pypi.org/project/mapie/ .. |Conda| image:: https://img.shields.io/conda/vn/conda-forge/mapie -.. _Conda: https://anaconda.org/conda-forge/mapie + :target: https://anaconda.org/conda-forge/mapie .. |Release| image:: https://img.shields.io/github/v/release/scikit-learn-contrib/mapie -.. _Release: https://github.com/scikit-learn-contrib/MAPIE/releases + :target: https://github.com/scikit-learn-contrib/MAPIE/releases .. |Commits| image:: https://img.shields.io/github/commits-since/scikit-learn-contrib/mapie/latest/master -.. _Commits: https://github.com/scikit-learn-contrib/MAPIE/commits/master + :target: https://github.com/scikit-learn-contrib/MAPIE/commits/master .. |DOI| image:: https://img.shields.io/badge/10.48550/arXiv.2207.12274-B31B1B.svg -.. _DOI: https://arxiv.org/abs/2207.12274 + :target: https://arxiv.org/abs/2207.12274 .. image:: https://github.com/simai-ml/MAPIE/raw/master/doc/images/mapie_logo_nobg_cut.png :width: 400 @@ -168,27 +168,27 @@ For more information on the contribution process, please go `here Date: Fri, 19 Apr 2024 16:14:58 +0200 Subject: [PATCH 022/128] Fix illustration link on readme --- README.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index fef24768e..5020c1631 100644 --- a/README.rst +++ b/README.rst @@ -172,23 +172,23 @@ and with the financial support from Région Ile de France and Confiance.ai. .. |Quantmetry| image:: https://www.quantmetry.com/wp-content/uploads/2020/08/08-Logo-quant-Texte-noir.svg :width: 150 - :target: https://www.quantmetry.com/ + :target: https://www.quantmetry.com/ .. |Michelin| image:: https://agngnconpm.cloudimg.io/v7/https://dgaddcosprod.blob.core.windows.net/corporate-production/attachments/cls05tqdd9e0o0tkdghwi9m7n-clooe1x0c3k3x0tlu4cxi6dpn-bibendum-salut.full.png :width: 100 - :target: https://www.michelin.com/en/ + :target: https://www.michelin.com/en/ .. |ENS| image:: https://file.diplomeo-static.com/file/00/00/01/34/13434.svg :width: 100 - :target: https://ens-paris-saclay.fr/en + :target: https://ens-paris-saclay.fr/en .. |Confiance.ai| image:: https://pbs.twimg.com/profile_images/1443838558549258264/EvWlv1Vq_400x400.jpg :width: 100 - :target: https://www.confiance.ai/ + :target: https://www.confiance.ai/ .. |IledeFrance| image:: https://www.iledefrance.fr/sites/default/files/logo/2024-02/logoGagnerok.svg :width: 100 - :target: https://www.iledefrance.fr/ + :target: https://www.iledefrance.fr/ 🔍 References From d2bf6f8d7e40d72b751fbf33d9f1ffa26d8b9254 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 16:17:43 +0200 Subject: [PATCH 023/128] Fix failing docs badge --- README.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 5020c1631..ca5ef4279 100644 --- a/README.rst +++ b/README.rst @@ -8,8 +8,9 @@ .. |Codecov| image:: https://codecov.io/gh/scikit-learn-contrib/MAPIE/branch/master/graph/badge.svg?token=F2S6KYH4V1 :target: https://codecov.io/gh/scikit-learn-contrib/MAPIE -.. |ReadTheDocs| image:: https://readthedocs.org/projects/mapie/badge - :target: https://mapie.readthedocs.io/en/latest +.. |ReadTheDocs| image:: https://readthedocs.org/projects/mapie/badge/?version=stable + :target: https://mapie.readthedocs.io/en/stable/?badge=stable + :alt: Documentation Status .. |License| image:: https://img.shields.io/github/license/simai-ml/MAPIE :target: https://github.com/scikit-learn-contrib/MAPIE/blob/master/LICENSE From faf31c6ae86483515d8b59674ee4ddca2c7f7472 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 16:37:42 +0200 Subject: [PATCH 024/128] Update Python version classifiers in setup.py --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 6233b08d9..51bd154b8 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,9 @@ "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10" + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12" ] setup( From 746bdc2cf14eb6f616d3d89b95cbdfb12a408e66 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 16:38:04 +0200 Subject: [PATCH 025/128] Test for workflow with python 3.12 --- .github/workflows/test.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b3487df6b..d48c7b3ad 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -29,6 +29,9 @@ jobs: - os: ubuntu-latest python-version: "3.11" numpy-version: 1.25.2 + - os: ubuntu-latest + python-version: "3.12" + numpy-version: 1.25.2 - os: windows-latest python-version: "3.11" numpy-version: 1.25.2 From 626c3b66a4fefec166e23c197943258f9d61745c Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 25 Apr 2024 19:13:20 +0200 Subject: [PATCH 026/128] FIX: change emails --- CODE_OF_CONDUCT.md | 2 +- setup.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 59122bcbe..7563cc3c3 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -60,7 +60,7 @@ representative at an online or offline event. Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at -tcordier@quantmetry.com. +thibault.a.cordier@capgemini.com. All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the diff --git a/setup.py b/setup.py index 6233b08d9..0a87036a6 100644 --- a/setup.py +++ b/setup.py @@ -21,9 +21,9 @@ LICENSE = "new BSD" MAINTAINER = "T. Cordier, V. Blot, L. Lacombe" MAINTAINER_EMAIL = ( - "tcordier@quantmetry.com, " - "vblot@quantmetry.com, " - "llacombe@quantmetry.com" + "thibault.a.cordier@capgemini.com, " + "vincent.blot@capgemini.com, " + "louis.lacombe@capgemini.com" ) PYTHON_REQUIRES = ">=3.7" PACKAGES = find_packages() From e80151ab007cb8228b70a53d5856adb254cbc0f8 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 3 May 2024 15:43:14 +0200 Subject: [PATCH 027/128] chore: update miniconda version to "latest" --- .github/workflows/test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 568bd77d6..360c7bacd 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -51,6 +51,7 @@ jobs: python-version: ${{ matrix.python-version }} environment-file: environment.ci.yml channels: defaults, conda-forge + miniconda-version: "latest" - name: Install numpy run: conda install numpy=${{ matrix.numpy-version }} - name: Check linting From 21da7d9daec1ce5d311942ffad4133e3522c979c Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 3 May 2024 15:48:34 +0200 Subject: [PATCH 028/128] chore: update miniconda version to "latest" --- .github/workflows/test.yml | 3 --- doc/theoretical_description_binary_classification.rst | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 360c7bacd..4298a96f1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -29,9 +29,6 @@ jobs: - os: ubuntu-latest python-version: "3.11" numpy-version: 1.25.2 - - os: ubuntu-latest - python-version: "3.12" - numpy-version: 1.25.2 - os: windows-latest python-version: "3.11" numpy-version: 1.25.2 diff --git a/doc/theoretical_description_binary_classification.rst b/doc/theoretical_description_binary_classification.rst index 3ca8b3b77..877bf83f4 100644 --- a/doc/theoretical_description_binary_classification.rst +++ b/doc/theoretical_description_binary_classification.rst @@ -12,7 +12,7 @@ calibration (see :doc:`theoretical_description_calibration`), confidence interva These 3 notions are tightly related for score-based classifier, as it is shown in [1]. Prediction sets can be computed in the same way for multiclass and binary classification with -:class:`~mapie.calibration.MapieClassifier`, and there are the same theoretical guarantees. +:class:`~mapie.classification.MapieClassifier`, and there are the same theoretical guarantees. Nevertheless, prediction sets are often much less informative in the binary case than in the multiclass case. From Gupta et al [1]: From 796af77dba6dc49d03b1edcb54e5f92ef79fed4b Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 3 May 2024 15:56:40 +0200 Subject: [PATCH 029/128] chore: update miniconda version to "latest" --- .github/workflows/test.yml | 2 +- environment.doc.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4298a96f1..a47f7aa68 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -48,7 +48,7 @@ jobs: python-version: ${{ matrix.python-version }} environment-file: environment.ci.yml channels: defaults, conda-forge - miniconda-version: "latest" + # miniconda-version: "latest" - name: Install numpy run: conda install numpy=${{ matrix.numpy-version }} - name: Check linting diff --git a/environment.doc.yml b/environment.doc.yml index f6a0e6ce9..771ef9e35 100644 --- a/environment.doc.yml +++ b/environment.doc.yml @@ -1,9 +1,9 @@ name: mapie-doc channels: - - defaults + # - defaults - conda-forge dependencies: - - lightgbm=3.2.1 + - lightgbm - numpydoc=1.1.0 - pandas=1.3.5 - python=3.10 From 44b1fec1bb3a9065dd4356ea2ae1145406776721 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 3 May 2024 16:15:31 +0200 Subject: [PATCH 030/128] chore: update miniconda version to "latest" --- environment.doc.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/environment.doc.yml b/environment.doc.yml index 771ef9e35..372523e8e 100644 --- a/environment.doc.yml +++ b/environment.doc.yml @@ -1,12 +1,11 @@ name: mapie-doc channels: - # - defaults + - defaults - conda-forge dependencies: - - lightgbm + - lightgbm==3.2.1 - numpydoc=1.1.0 - pandas=1.3.5 - - python=3.10 - scikit-learn - sphinx=4.3.2 - sphinx-gallery=0.10.1 From cdbdfbac40d790520a74d736f8a290635a29bede Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 3 May 2024 16:15:43 +0200 Subject: [PATCH 031/128] Update Python version classifiers in setup.py --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 51bd154b8..ac034f860 100644 --- a/setup.py +++ b/setup.py @@ -42,8 +42,7 @@ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12" + "Programming Language :: Python :: 3.11" ] setup( From f2db5eea12d8c290f64b33cf9d4948eb0f02f77b Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Fri, 12 Apr 2024 14:01:54 +0200 Subject: [PATCH 032/128] TRY Code changes without any impact to make a PR --- mapie/tests/test_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index d6786e3d2..431a1264a 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -26,7 +26,7 @@ from mapie.metrics import classification_coverage_score from mapie.utils import check_alpha -random_state = 42 +random_state = 43 METHODS = ["lac", "aps", "raps"] WRONG_METHODS = ["scores", "cumulated", "test", "", 1, 2.5, (1, 2)] From 40b41c274ad2d9618a9bfbd0169ab164a05d4c78 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Fri, 12 Apr 2024 14:44:15 +0200 Subject: [PATCH 033/128] TRY Code changes without any impact to make a PR --- mapie/tests/test_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index 431a1264a..c10cbfe26 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -26,7 +26,7 @@ from mapie.metrics import classification_coverage_score from mapie.utils import check_alpha -random_state = 43 +random_state = 42 METHODS = ["lac", "aps", "raps"] WRONG_METHODS = ["scores", "cumulated", "test", "", 1, 2.5, (1, 2)] @@ -936,7 +936,7 @@ def get_params(self, *args: Any, **kwargs: Any): def do_nothing(*args: Any) -> None: - "Mock function that does nothing." + "Mock function that does nothing. Add this sentence in order to make a change without any impact on code" pass From 6d71abbe84162bad728fa65eac293854daf22f36 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Fri, 12 Apr 2024 14:54:16 +0200 Subject: [PATCH 034/128] TRY Code changes without any impact to make a PR --- mapie/tests/test_classification.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index c10cbfe26..fc5f196c1 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -936,7 +936,9 @@ def get_params(self, *args: Any, **kwargs: Any): def do_nothing(*args: Any) -> None: - "Mock function that does nothing. Add this sentence in order to make a change without any impact on code" + """Mock function that does nothing. + Add this sentence in order to make + a change without any impact on code""" pass From 1e2273e0cf73be720cb7e0149b46783c6f92f787 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Mon, 15 Apr 2024 09:48:52 +0200 Subject: [PATCH 035/128] Update joblib dependency to version 1.3.2 --- environment.ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.ci.yml b/environment.ci.yml index 07f31c0a3..36bd84ee6 100644 --- a/environment.ci.yml +++ b/environment.ci.yml @@ -10,3 +10,4 @@ dependencies: - pytest-cov - scikit-learn - typed-ast + - joblib == 1.3.2 From 510d23061b2b7b94d60b2620e447302e636b1b9e Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 18 Apr 2024 15:32:01 +0200 Subject: [PATCH 036/128] Update joblib dependency to version 1.3.2 --- requirements.ci.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.ci.txt b/requirements.ci.txt index 587a04f87..834302510 100644 --- a/requirements.ci.txt +++ b/requirements.ci.txt @@ -5,3 +5,4 @@ pandas pytest pytest-cov typed-ast +joblib == 1.3.2 From 0b11f68dd4cae08f36b822a4827203c09e8f899a Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 13:55:50 +0200 Subject: [PATCH 037/128] FIX: possibe fix for issue with joblib and python version --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b3487df6b..fdf820bc0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,7 +33,7 @@ jobs: python-version: "3.11" numpy-version: 1.25.2 - os: macos-latest - python-version: "3.11" + python-version: "3.10" numpy-version: 1.25.2 defaults: run: From cd566dc366cae8fee85806993dbb237d4f1df386 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 14:13:36 +0200 Subject: [PATCH 038/128] FIX: make no changes --- environment.ci.yml | 1 - mapie/tests/test_classification.py | 7 ------- requirements.ci.txt | 1 - 3 files changed, 9 deletions(-) diff --git a/environment.ci.yml b/environment.ci.yml index 36bd84ee6..07f31c0a3 100644 --- a/environment.ci.yml +++ b/environment.ci.yml @@ -10,4 +10,3 @@ dependencies: - pytest-cov - scikit-learn - typed-ast - - joblib == 1.3.2 diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index fc5f196c1..52d0c379d 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -935,13 +935,6 @@ def get_params(self, *args: Any, **kwargs: Any): return {"prefit": False} -def do_nothing(*args: Any) -> None: - """Mock function that does nothing. - Add this sentence in order to make - a change without any impact on code""" - pass - - def test_mapie_classifier_sklearn_estim() -> None: """Test that MapieClassifier is an sklearn estimator""" check_estimator(MapieClassifier()) diff --git a/requirements.ci.txt b/requirements.ci.txt index 834302510..587a04f87 100644 --- a/requirements.ci.txt +++ b/requirements.ci.txt @@ -5,4 +5,3 @@ pandas pytest pytest-cov typed-ast -joblib == 1.3.2 From 7b5b30cd001577586370a48af04c67a3238bb69b Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 15:07:32 +0200 Subject: [PATCH 039/128] FIX: make no changes to original file --- mapie/tests/test_classification.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index 52d0c379d..d6786e3d2 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -935,6 +935,11 @@ def get_params(self, *args: Any, **kwargs: Any): return {"prefit": False} +def do_nothing(*args: Any) -> None: + "Mock function that does nothing." + pass + + def test_mapie_classifier_sklearn_estim() -> None: """Test that MapieClassifier is an sklearn estimator""" check_estimator(MapieClassifier()) From 0727a27df8c19ce295a114655dcce2a22db28d25 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 3 May 2024 16:20:30 +0200 Subject: [PATCH 040/128] chore: update miniconda version to latest stable --- .github/workflows/test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index fdf820bc0..4298a96f1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -48,6 +48,7 @@ jobs: python-version: ${{ matrix.python-version }} environment-file: environment.ci.yml channels: defaults, conda-forge + miniconda-version: "latest" - name: Install numpy run: conda install numpy=${{ matrix.numpy-version }} - name: Check linting From 08ea5a9a613539819d2ac99bd0cbefe8df936f41 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Mon, 6 May 2024 13:46:33 +0200 Subject: [PATCH 041/128] chore: Update joblib dependency to version 1.3.2 --- mapie/calibration.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mapie/calibration.py b/mapie/calibration.py index c19238d23..d15c83872 100644 --- a/mapie/calibration.py +++ b/mapie/calibration.py @@ -96,6 +96,16 @@ class MapieCalibrator(BaseEstimator, ClassifierMixin): >>> y_toy = np.stack([0, 0, 1, 0, 1, 2, 1, 2, 2]) >>> mapie = MapieCalibrator().fit(X_toy, y_toy, random_state=20) >>> y_calib = mapie.predict_proba(X_toy) + >>> print(y_calib) + [[0.84...... nan nan] + [0.75...... nan nan] + [0.62...... nan nan] + [ nan 0.33...... nan] + [ nan 0.33...... nan] + [ nan 0.33...... nan] + [ nan nan 0.33......] + [ nan nan 0.54......] + [ nan nan 0.66......]] """ fit_attributes = [ From c666194a90855d80b3ca2ae0e3af2837bef52ce4 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Mon, 6 May 2024 13:58:46 +0200 Subject: [PATCH 042/128] ADD history: conda versioning and reduce precision for test in MapieCalibrator --- HISTORY.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/HISTORY.rst b/HISTORY.rst index f837de67f..bf1572ad4 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,6 +2,12 @@ History ======= +0.8.3 (2024-**-**) +------------------ + +* Fix conda versionning. +* Reduce precision for test in `MapieCalibrator`. + 0.8.3 (2024-03-01) ------------------ @@ -13,7 +19,7 @@ History 0.8.2 (2024-01-11) ------------------ -* * Resolve issue still present in 0.8.1 by updating pandas. +* Resolve issue still present in 0.8.1 by updating pandas. 0.8.1 (2024-01-11) ------------------ From d7200845158fd50c1fbea8112b7de7b34c315dff Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 3 May 2024 16:20:30 +0200 Subject: [PATCH 043/128] chore: update miniconda version to latest stable --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a47f7aa68..4298a96f1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -48,7 +48,7 @@ jobs: python-version: ${{ matrix.python-version }} environment-file: environment.ci.yml channels: defaults, conda-forge - # miniconda-version: "latest" + miniconda-version: "latest" - name: Install numpy run: conda install numpy=${{ matrix.numpy-version }} - name: Check linting From 737bde9d8e79610c0c2b7365e03be2d99b945aa9 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 18 Apr 2024 16:31:00 +0000 Subject: [PATCH 044/128] FIX: Put lower threshold for difference --- mapie/tests/test_calibration.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mapie/tests/test_calibration.py b/mapie/tests/test_calibration.py index aeefada56..450cab735 100644 --- a/mapie/tests/test_calibration.py +++ b/mapie/tests/test_calibration.py @@ -344,8 +344,10 @@ def test_correct_results_binary(cv: str) -> None: pred_ = mapie_cal.predict_proba(X_binary) top_label_ece_ = top_label_ece(y_binary, pred_) ece = expected_calibration_error(y_binary, pred_) - np.testing.assert_array_almost_equal( - results_binary[cv]["y_score"], pred_ # type:ignore + np.testing.assert_array_almost_equal( # type:ignore + results_binary[cv]["y_score"], + pred_, + decimal=1e-2 ) np.testing.assert_allclose( # type:ignore results_binary[cv]["top_label_ece"], From 19f5a06ed3c240526476315e0488ebe6f14847e8 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 18 Apr 2024 18:41:23 +0200 Subject: [PATCH 045/128] Update decimal number for test_calibration.py --- mapie/tests/test_calibration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mapie/tests/test_calibration.py b/mapie/tests/test_calibration.py index 450cab735..dc90c9831 100644 --- a/mapie/tests/test_calibration.py +++ b/mapie/tests/test_calibration.py @@ -347,7 +347,7 @@ def test_correct_results_binary(cv: str) -> None: np.testing.assert_array_almost_equal( # type:ignore results_binary[cv]["y_score"], pred_, - decimal=1e-2 + decimal=2 ) np.testing.assert_allclose( # type:ignore results_binary[cv]["top_label_ece"], From 34ddeda107beb9517269ebab28c07f45f263f2b2 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 11:04:30 +0200 Subject: [PATCH 046/128] FIX: linting --- .../regression/2-advanced-analysis/plot_timeseries_enbpi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/regression/2-advanced-analysis/plot_timeseries_enbpi.py b/examples/regression/2-advanced-analysis/plot_timeseries_enbpi.py index 7c63343f9..6dda6b113 100644 --- a/examples/regression/2-advanced-analysis/plot_timeseries_enbpi.py +++ b/examples/regression/2-advanced-analysis/plot_timeseries_enbpi.py @@ -165,7 +165,7 @@ print( "Coverage / prediction interval width mean for MapieTimeSeriesRegressor: " "\nEnbPI without any partial_fit:" - f"{coverage_npfit_enbpi :.3f}, {width_npfit_enbpi:.3f}" + f"{coverage_npfit_enbpi:.3f}, {width_npfit_enbpi:.3f}" ) print( "Coverage / prediction interval width mean for MapieTimeSeriesRegressor: " From 9e264253457fc112c26bf87720fbdd93147bad08 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 11:04:47 +0200 Subject: [PATCH 047/128] ADD: decimal precision and type fix --- mapie/tests/test_calibration.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mapie/tests/test_calibration.py b/mapie/tests/test_calibration.py index dc90c9831..0057d7fdc 100644 --- a/mapie/tests/test_calibration.py +++ b/mapie/tests/test_calibration.py @@ -314,7 +314,9 @@ def test_correct_results(cv: str) -> None: pred_ = mapie_cal.predict_proba(X_test) top_label_ece_ = top_label_ece(y_test, pred_) np.testing.assert_array_almost_equal( - results[cv]["y_score"], pred_ # type:ignore + np.array(results[cv]["y_score"]), + np.array(pred_), + decimal=2 ) np.testing.assert_allclose( # type:ignore results[cv]["top_label_ece"], @@ -344,9 +346,9 @@ def test_correct_results_binary(cv: str) -> None: pred_ = mapie_cal.predict_proba(X_binary) top_label_ece_ = top_label_ece(y_binary, pred_) ece = expected_calibration_error(y_binary, pred_) - np.testing.assert_array_almost_equal( # type:ignore - results_binary[cv]["y_score"], - pred_, + np.testing.assert_array_almost_equal( + np.array(results_binary[cv]["y_score"]), + np.array(pred_), decimal=2 ) np.testing.assert_allclose( # type:ignore From 0e05d6e8c24098fde2d45efe4d40b699f08a8efe Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 11:05:11 +0200 Subject: [PATCH 048/128] FIX: remove test due to changes in version 1.4.2 of _sigmoidcalibration --- mapie/calibration.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/mapie/calibration.py b/mapie/calibration.py index 7a9e495ef..c19238d23 100644 --- a/mapie/calibration.py +++ b/mapie/calibration.py @@ -96,16 +96,6 @@ class MapieCalibrator(BaseEstimator, ClassifierMixin): >>> y_toy = np.stack([0, 0, 1, 0, 1, 2, 1, 2, 2]) >>> mapie = MapieCalibrator().fit(X_toy, y_toy, random_state=20) >>> y_calib = mapie.predict_proba(X_toy) - >>> print(y_calib) - [[0.84900723 nan nan] - [0.75432411 nan nan] - [0.62285341 nan nan] - [ nan 0.33333333 nan] - [ nan 0.33333333 nan] - [ nan 0.33333333 nan] - [ nan nan 0.33333002] - [ nan nan 0.54326683] - [ nan nan 0.66666124]] """ fit_attributes = [ From 599d483ae49982cf11989c68a72f87c47ddac54c Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 13:55:13 +0200 Subject: [PATCH 049/128] FIX: do not change python version --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4298a96f1..109459c66 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,7 +33,7 @@ jobs: python-version: "3.11" numpy-version: 1.25.2 - os: macos-latest - python-version: "3.10" + python-version: "3.11" numpy-version: 1.25.2 defaults: run: From 23ba5bc78500ccbdeec7bb64ac716dfae2b9a740 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Fri, 12 Apr 2024 14:01:54 +0200 Subject: [PATCH 050/128] TRY Code changes without any impact to make a PR --- mapie/tests/test_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index d6786e3d2..431a1264a 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -26,7 +26,7 @@ from mapie.metrics import classification_coverage_score from mapie.utils import check_alpha -random_state = 42 +random_state = 43 METHODS = ["lac", "aps", "raps"] WRONG_METHODS = ["scores", "cumulated", "test", "", 1, 2.5, (1, 2)] From 3fa5dfbd0d4fdfe6f54c48be260d9612e1eeee9b Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Fri, 12 Apr 2024 14:44:15 +0200 Subject: [PATCH 051/128] TRY Code changes without any impact to make a PR --- mapie/tests/test_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index 431a1264a..c10cbfe26 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -26,7 +26,7 @@ from mapie.metrics import classification_coverage_score from mapie.utils import check_alpha -random_state = 43 +random_state = 42 METHODS = ["lac", "aps", "raps"] WRONG_METHODS = ["scores", "cumulated", "test", "", 1, 2.5, (1, 2)] @@ -936,7 +936,7 @@ def get_params(self, *args: Any, **kwargs: Any): def do_nothing(*args: Any) -> None: - "Mock function that does nothing." + "Mock function that does nothing. Add this sentence in order to make a change without any impact on code" pass From 239faa3fcb0cfe9dda8a42cfa0e10582542b0621 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Fri, 12 Apr 2024 14:54:16 +0200 Subject: [PATCH 052/128] TRY Code changes without any impact to make a PR --- mapie/tests/test_classification.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index c10cbfe26..fc5f196c1 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -936,7 +936,9 @@ def get_params(self, *args: Any, **kwargs: Any): def do_nothing(*args: Any) -> None: - "Mock function that does nothing. Add this sentence in order to make a change without any impact on code" + """Mock function that does nothing. + Add this sentence in order to make + a change without any impact on code""" pass From bf42a5b3aba5a4c8b82762664a93c24c538bb300 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Mon, 15 Apr 2024 09:48:52 +0200 Subject: [PATCH 053/128] Update joblib dependency to version 1.3.2 --- environment.ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.ci.yml b/environment.ci.yml index 07f31c0a3..36bd84ee6 100644 --- a/environment.ci.yml +++ b/environment.ci.yml @@ -10,3 +10,4 @@ dependencies: - pytest-cov - scikit-learn - typed-ast + - joblib == 1.3.2 From 044a722a8fd60637326c00a35e1f93611e993169 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 18 Apr 2024 15:32:01 +0200 Subject: [PATCH 054/128] Update joblib dependency to version 1.3.2 --- requirements.ci.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.ci.txt b/requirements.ci.txt index 587a04f87..834302510 100644 --- a/requirements.ci.txt +++ b/requirements.ci.txt @@ -5,3 +5,4 @@ pandas pytest pytest-cov typed-ast +joblib == 1.3.2 From 296bf902d2de251b85dbf4dcfb6b2cf4f4437673 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 13:55:50 +0200 Subject: [PATCH 055/128] FIX: possibe fix for issue with joblib and python version --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 109459c66..4298a96f1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,7 +33,7 @@ jobs: python-version: "3.11" numpy-version: 1.25.2 - os: macos-latest - python-version: "3.11" + python-version: "3.10" numpy-version: 1.25.2 defaults: run: From 0410cdd34765862ecf1a6ee5b959471613e280ac Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 14:13:36 +0200 Subject: [PATCH 056/128] FIX: make no changes --- environment.ci.yml | 1 - mapie/tests/test_classification.py | 7 ------- requirements.ci.txt | 1 - 3 files changed, 9 deletions(-) diff --git a/environment.ci.yml b/environment.ci.yml index 36bd84ee6..07f31c0a3 100644 --- a/environment.ci.yml +++ b/environment.ci.yml @@ -10,4 +10,3 @@ dependencies: - pytest-cov - scikit-learn - typed-ast - - joblib == 1.3.2 diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index fc5f196c1..52d0c379d 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -935,13 +935,6 @@ def get_params(self, *args: Any, **kwargs: Any): return {"prefit": False} -def do_nothing(*args: Any) -> None: - """Mock function that does nothing. - Add this sentence in order to make - a change without any impact on code""" - pass - - def test_mapie_classifier_sklearn_estim() -> None: """Test that MapieClassifier is an sklearn estimator""" check_estimator(MapieClassifier()) diff --git a/requirements.ci.txt b/requirements.ci.txt index 834302510..587a04f87 100644 --- a/requirements.ci.txt +++ b/requirements.ci.txt @@ -5,4 +5,3 @@ pandas pytest pytest-cov typed-ast -joblib == 1.3.2 From 983d50b0ef015e0f76383c72c70a2a698dcca0e1 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Fri, 19 Apr 2024 15:07:32 +0200 Subject: [PATCH 057/128] FIX: make no changes to original file --- mapie/tests/test_classification.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index 52d0c379d..d6786e3d2 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -935,6 +935,11 @@ def get_params(self, *args: Any, **kwargs: Any): return {"prefit": False} +def do_nothing(*args: Any) -> None: + "Mock function that does nothing." + pass + + def test_mapie_classifier_sklearn_estim() -> None: """Test that MapieClassifier is an sklearn estimator""" check_estimator(MapieClassifier()) From 8e9a91014af71e909730fbe1fce84263a69751c0 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Mon, 6 May 2024 13:46:33 +0200 Subject: [PATCH 058/128] chore: Update joblib dependency to version 1.3.2 --- mapie/calibration.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/mapie/calibration.py b/mapie/calibration.py index c19238d23..d15c83872 100644 --- a/mapie/calibration.py +++ b/mapie/calibration.py @@ -96,6 +96,16 @@ class MapieCalibrator(BaseEstimator, ClassifierMixin): >>> y_toy = np.stack([0, 0, 1, 0, 1, 2, 1, 2, 2]) >>> mapie = MapieCalibrator().fit(X_toy, y_toy, random_state=20) >>> y_calib = mapie.predict_proba(X_toy) + >>> print(y_calib) + [[0.84...... nan nan] + [0.75...... nan nan] + [0.62...... nan nan] + [ nan 0.33...... nan] + [ nan 0.33...... nan] + [ nan 0.33...... nan] + [ nan nan 0.33......] + [ nan nan 0.54......] + [ nan nan 0.66......]] """ fit_attributes = [ From 7562fc1abe293c4e2879d020b8b46bcc34293d03 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Mon, 6 May 2024 13:58:46 +0200 Subject: [PATCH 059/128] ADD history: conda versioning and reduce precision for test in MapieCalibrator --- HISTORY.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/HISTORY.rst b/HISTORY.rst index f837de67f..bf1572ad4 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,6 +2,12 @@ History ======= +0.8.3 (2024-**-**) +------------------ + +* Fix conda versionning. +* Reduce precision for test in `MapieCalibrator`. + 0.8.3 (2024-03-01) ------------------ @@ -13,7 +19,7 @@ History 0.8.2 (2024-01-11) ------------------ -* * Resolve issue still present in 0.8.1 by updating pandas. +* Resolve issue still present in 0.8.1 by updating pandas. 0.8.1 (2024-01-11) ------------------ From 2e52c8b4c614ed5e7c33e2f26751e36f757b44ee Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Mon, 13 May 2024 16:23:01 +0200 Subject: [PATCH 060/128] Remove changes --- environment.doc.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/environment.doc.yml b/environment.doc.yml index 372523e8e..f6a0e6ce9 100644 --- a/environment.doc.yml +++ b/environment.doc.yml @@ -3,9 +3,10 @@ channels: - defaults - conda-forge dependencies: - - lightgbm==3.2.1 + - lightgbm=3.2.1 - numpydoc=1.1.0 - pandas=1.3.5 + - python=3.10 - scikit-learn - sphinx=4.3.2 - sphinx-gallery=0.10.1 From 7cd3c50305739f1290d4256ed30befe1a678b3a2 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Mon, 13 May 2024 16:23:12 +0200 Subject: [PATCH 061/128] chore: Update image sizes in README.rst --- README.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index ca5ef4279..b29b295e5 100644 --- a/README.rst +++ b/README.rst @@ -172,23 +172,23 @@ and with the financial support from Région Ile de France and Confiance.ai. |Quantmetry| |Michelin| |ENS| |Confiance.ai| |IledeFrance| .. |Quantmetry| image:: https://www.quantmetry.com/wp-content/uploads/2020/08/08-Logo-quant-Texte-noir.svg - :width: 150 + :height: 100 :target: https://www.quantmetry.com/ .. |Michelin| image:: https://agngnconpm.cloudimg.io/v7/https://dgaddcosprod.blob.core.windows.net/corporate-production/attachments/cls05tqdd9e0o0tkdghwi9m7n-clooe1x0c3k3x0tlu4cxi6dpn-bibendum-salut.full.png - :width: 100 + :height: 100 :target: https://www.michelin.com/en/ .. |ENS| image:: https://file.diplomeo-static.com/file/00/00/01/34/13434.svg - :width: 100 + :height: 100 :target: https://ens-paris-saclay.fr/en .. |Confiance.ai| image:: https://pbs.twimg.com/profile_images/1443838558549258264/EvWlv1Vq_400x400.jpg - :width: 100 + :height: 100 :target: https://www.confiance.ai/ .. |IledeFrance| image:: https://www.iledefrance.fr/sites/default/files/logo/2024-02/logoGagnerok.svg - :width: 100 + :height: 100 :target: https://www.iledefrance.fr/ From 9653349c36e84ef7ac9decb7440a996203ccccdb Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Mon, 13 May 2024 16:23:55 +0200 Subject: [PATCH 062/128] chore: Update image sizes in README.rst --- README.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index b29b295e5..65d500025 100644 --- a/README.rst +++ b/README.rst @@ -172,23 +172,23 @@ and with the financial support from Région Ile de France and Confiance.ai. |Quantmetry| |Michelin| |ENS| |Confiance.ai| |IledeFrance| .. |Quantmetry| image:: https://www.quantmetry.com/wp-content/uploads/2020/08/08-Logo-quant-Texte-noir.svg - :height: 100 + :height: 25 :target: https://www.quantmetry.com/ .. |Michelin| image:: https://agngnconpm.cloudimg.io/v7/https://dgaddcosprod.blob.core.windows.net/corporate-production/attachments/cls05tqdd9e0o0tkdghwi9m7n-clooe1x0c3k3x0tlu4cxi6dpn-bibendum-salut.full.png - :height: 100 + :height: 25 :target: https://www.michelin.com/en/ .. |ENS| image:: https://file.diplomeo-static.com/file/00/00/01/34/13434.svg - :height: 100 + :height: 25 :target: https://ens-paris-saclay.fr/en .. |Confiance.ai| image:: https://pbs.twimg.com/profile_images/1443838558549258264/EvWlv1Vq_400x400.jpg - :height: 100 + :height: 25 :target: https://www.confiance.ai/ .. |IledeFrance| image:: https://www.iledefrance.fr/sites/default/files/logo/2024-02/logoGagnerok.svg - :height: 100 + :height: 25 :target: https://www.iledefrance.fr/ From 379d01acd95a966448fe4b7359155e66f80295bb Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Mon, 13 May 2024 16:25:23 +0200 Subject: [PATCH 063/128] chore: Update image sizes in README.rst --- README.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 65d500025..a6b57cbfd 100644 --- a/README.rst +++ b/README.rst @@ -172,23 +172,23 @@ and with the financial support from Région Ile de France and Confiance.ai. |Quantmetry| |Michelin| |ENS| |Confiance.ai| |IledeFrance| .. |Quantmetry| image:: https://www.quantmetry.com/wp-content/uploads/2020/08/08-Logo-quant-Texte-noir.svg - :height: 25 + :height: 35 :target: https://www.quantmetry.com/ .. |Michelin| image:: https://agngnconpm.cloudimg.io/v7/https://dgaddcosprod.blob.core.windows.net/corporate-production/attachments/cls05tqdd9e0o0tkdghwi9m7n-clooe1x0c3k3x0tlu4cxi6dpn-bibendum-salut.full.png - :height: 25 + :height: 35 :target: https://www.michelin.com/en/ .. |ENS| image:: https://file.diplomeo-static.com/file/00/00/01/34/13434.svg - :height: 25 + :height: 35 :target: https://ens-paris-saclay.fr/en .. |Confiance.ai| image:: https://pbs.twimg.com/profile_images/1443838558549258264/EvWlv1Vq_400x400.jpg - :height: 25 + :height: 35 :target: https://www.confiance.ai/ .. |IledeFrance| image:: https://www.iledefrance.fr/sites/default/files/logo/2024-02/logoGagnerok.svg - :height: 25 + :height: 35 :target: https://www.iledefrance.fr/ From b9efa0852ee890828c93ae65364e7beef3d964cd Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Tue, 14 May 2024 17:34:01 +0200 Subject: [PATCH 064/128] chore: Update version number in configuration files --- .bumpversion.cfg | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 7f85c0cc2..50f194e51 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -14,3 +14,8 @@ replace = __version__ = "{new_version}" [bumpversion:file:doc/conf.py] search = version = "{current_version}" replace = version = "{new_version}" + +[bumpversion:file:CITATION.cff] +search = version = "{current_version}" +replace = version = "{new_version}" + From ff6ff003215a2eb33b17650b5e59c1234f17f4e8 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Tue, 14 May 2024 17:34:08 +0200 Subject: [PATCH 065/128] chore: Add CITATION.cff file with software citation information --- CITATION.cff | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 CITATION.cff diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 000000000..aead3751d --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,27 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: +- family-names: "Cordier" + given-names: "Thibault" + orcid: "https://orcid.org/0000-0000-0000-0000" +title: "MAPIE - Model Agnostic Prediction Interval Estimator" +version: 0.8.3 +date-released: 2019-04-30 +url: "https://github.com/scikit-learn-contrib/MAPIE" +preferred-citation: + type: article + authors: + - family-names: "Taquet" + given-names: "Vianney" + - family-names: "Blot" + given-names: "Vincent" + - family-names: "Morzadec" + given-names: "Thomas" + - family-names: "Lacombe" + given-names: "Louis" + - family-names: "Brunel" + given-names: "Nicolas" + doi: "10.48550/arXiv.2207.12274" + journal: "arXiv preprint arXiv:2207.12274" + title: "MAPIE: an open-source library for distribution-free uncertainty quantification" + year: 2021 \ No newline at end of file From aad1b95b8166d79b8580ce015b271329ecde19e3 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Tue, 14 May 2024 19:12:06 +0200 Subject: [PATCH 066/128] feat: Improve documentation for Conformalized Quantile Regression (CQR) method --- doc/theoretical_description_regression.rst | 44 +++++++++++++--------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/doc/theoretical_description_regression.rst b/doc/theoretical_description_regression.rst index ae4b7c346..9479f4645 100644 --- a/doc/theoretical_description_regression.rst +++ b/doc/theoretical_description_regression.rst @@ -245,30 +245,38 @@ uncertainty is higher than :math:`CV+`, because the models' prediction spread is then higher. -9. The conformalized quantile regression (CQR) method -===================================================== +9. The Conformalized Quantile Regression (CQR) Method +================================================== -The conformalized quantile method allows for better interval widths with -heteroscedastic data. It uses quantile regressors with different quantile -values to estimate the prediction bounds and the residuals of these methods are -used to create the guaranteed coverage value. +The conformalized quantile regression (CQR) method allows for better interval widths with +heteroscedastic data. It uses quantile regressors with different quantile values to estimate +the prediction bounds. The residuals of these methods are used to create the guaranteed +coverage value. -.. math:: +Notations and Definitions +------------------------- +- :math:`E_i`: Residuals for the i-th sample in the calibration set. +- :math:`E_{\text{low}}`: Residuals from the lower quantile model. +- :math:`E_{\text{high}}`: Residuals from the upper quantile model. +- :math:`Q_{1-\alpha}(E, \mathcal{I}_2)`: The :math:`(1-\alpha)(1+1/|\mathcal{I}_2|)`-th empirical quantile of the set :math:`{E_i : i \in \mathcal{I}_2}`, where :math:`\mathcal{I}_2` is the set of indices of the residuals in the calibration set. + +Mathematical Formulation +------------------------ +The prediction interval :math:`\hat{C}_{n, \alpha}^{\text{CQR}}(X_{n+1})` for a new sample :math:`X_{n+1}` is given by: + +.. math:: - \hat{C}_{n, \alpha}^{\rm CQR}(X_{n+1}) = - [\hat{q}_{\alpha_{lo}}(X_{n+1}) - Q_{1-\alpha}(E_{low}, \mathcal{I}_2), - \hat{q}_{\alpha_{hi}}(X_{n+1}) + Q_{1-\alpha}(E_{high}, \mathcal{I}_2)] + \hat{C}_{n, \alpha}^{\text{CQR}}(X_{n+1}) = + [\hat{q}_{\alpha_{\text{lo}}}(X_{n+1}) - Q_{1-\alpha}(E_{\text{low}}, \mathcal{I}_2), + \hat{q}_{\alpha_{\text{hi}}}(X_{n+1}) + Q_{1-\alpha}(E_{\text{high}}, \mathcal{I}_2)] -Where :math:`Q_{1-\alpha}(E, \mathcal{I}_2) := (1-\alpha)(1+1/ |\mathcal{I}_2|)`-th -empirical quantile of :math:`{E_i : i \in \mathcal{I}_2}` and :math:`\mathcal{I}_2` is the -residuals of the estimator fitted on the calibration set. Note that in the symmetric method, -:math:`E_{low}` and :math:`E_{high}` are equal. +Where: +- :math:`\hat{q}_{\alpha_{\text{lo}}}(X_{n+1})` is the predicted lower quantile for the new sample. +- :math:`\hat{q}_{\alpha_{\text{hi}}}(X_{n+1})` is the predicted upper quantile for the new sample. -As justified by [3], this method offers a theoretical guarantee of the target coverage -level :math:`1-\alpha`. +Note: In the symmetric method, :math:`E_{\text{low}}` and :math:`E_{\text{high}}` are considered equal. -Note that only the split method has been implemented and that it will run three separate -regressions when using :class:`mapie.quantile_regression.MapieQuantileRegressor`. +As justified by the literature, this method offers a theoretical guarantee of the target coverage level :math:`1-\alpha`. 10. The ensemble batch prediction intervals (EnbPI) method From 2e3673c8cef1699918c3064af516f96c84af5035 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Tue, 14 May 2024 19:12:13 +0200 Subject: [PATCH 067/128] chore: Add plot_cqr_symmetry_difference.py to regression examples --- .../plot_cqr_symmetry_difference.py | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 examples/regression/1-quickstart/plot_cqr_symmetry_difference.py diff --git a/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py b/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py new file mode 100644 index 000000000..895838fca --- /dev/null +++ b/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py @@ -0,0 +1,100 @@ +""" +====================================================== +Plotting MAPIE Quantile Regressor prediction intervals +====================================================== +An example plot of :class:`~mapie.quantile_regression.MapieQuantileRegressor` +illustrating the impact of the symmetry parameter. +""" +import numpy as np +from matplotlib import pyplot as plt +from sklearn.datasets import make_regression +from sklearn.ensemble import GradientBoostingRegressor + +from mapie.metrics import regression_coverage_score +from mapie.quantile_regression import MapieQuantileRegressor + +# Generate synthetic data +X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=59) + +# Define alpha level +alpha = 0.2 + +# Fit a Gradient Boosting Regressor for quantile regression +quantiles = [0.1, 0.9] +gb_reg = GradientBoostingRegressor(loss="quantile", alpha=quantiles[1]) +gb_reg.fit(X, y) + +# MAPIE Quantile Regressor with symmetry=True +mapie_qr_sym = MapieQuantileRegressor(estimator=gb_reg, alpha=alpha) +mapie_qr_sym.fit(X, y) +y_pred_sym, y_pis_sym = mapie_qr_sym.predict(X, symmetry=True) + +# MAPIE Quantile Regressor with symmetry=False +mapie_qr_asym = MapieQuantileRegressor(estimator=gb_reg, alpha=alpha) +mapie_qr_asym.fit(X, y) +y_pred_asym, y_pis_asym = mapie_qr_asym.predict(X, symmetry=False) + +# Calculate coverage scores +coverage_score_sym = regression_coverage_score(y, y_pis_sym[:, 0], y_pis_sym[:, 1]) +coverage_score_asym = regression_coverage_score(y, y_pis_asym[:, 0], y_pis_asym[:, 1]) + +# Sort the values for plotting +order = np.argsort(X[:, 0]) +X_sorted = X[order] +y_pred_sym_sorted = y_pred_sym[order] +y_pis_sym_sorted = y_pis_sym[order] +y_pred_asym_sorted = y_pred_asym[order] +y_pis_asym_sorted = y_pis_asym[order] + +# Plot symmetric prediction intervals +plt.figure(figsize=(14, 7)) + +plt.subplot(1, 2, 1) +plt.xlabel("x") +plt.ylabel("y") +plt.scatter(X, y, alpha=0.3) +plt.plot(X_sorted, y_pred_sym_sorted, color="C1") +plt.plot(X_sorted, y_pis_sym_sorted[:, 0], color="C1", ls="--") +plt.plot(X_sorted, y_pis_sym_sorted[:, 1], color="C1", ls="--") +plt.fill_between( + X_sorted.ravel(), + y_pis_sym_sorted[:, 0].ravel(), + y_pis_sym_sorted[:, 1].ravel(), + alpha=0.2, +) +plt.title( + f"Symmetric Intervals\n" + f"Target and effective coverages for " + f"alpha={alpha:.2f}: ({1-alpha:.3f}, {coverage_score_sym:.3f})" +) + +# Plot asymmetric prediction intervals +plt.subplot(1, 2, 2) +plt.xlabel("x") +plt.ylabel("y") +plt.scatter(X, y, alpha=0.3) +plt.plot(X_sorted, y_pred_asym_sorted, color="C2") +plt.plot(X_sorted, y_pis_asym_sorted[:, 0], color="C2", ls="--") +plt.plot(X_sorted, y_pis_asym_sorted[:, 1], color="C2", ls="--") +plt.fill_between( + X_sorted.ravel(), + y_pis_asym_sorted[:, 0].ravel(), + y_pis_asym_sorted[:, 1].ravel(), + alpha=0.2, +) +plt.title( + f"Asymmetric Intervals\n" + f"Target and effective coverages for " + f"alpha={alpha:.2f}: ({1-alpha:.3f}, {coverage_score_asym:.3f})" +) + +plt.tight_layout() +plt.show() + +# Explanation of the results +""" +The symmetric intervals (`symmetry=True`) are easier to interpret and tend to have higher +coverage but might not adapt well to varying noise levels. The asymmetric intervals +(`symmetry=False`) are more flexible and better capture heteroscedasticity but can appear +more jagged. +""" From ef282f6c463ae47be2b5514bbd750f1525241a09 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Wed, 15 May 2024 09:00:55 +0200 Subject: [PATCH 068/128] FIX: linting --- .../1-quickstart/plot_cqr_symmetry_difference.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py b/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py index 895838fca..7cc23a3e7 100644 --- a/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py +++ b/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py @@ -35,8 +35,12 @@ y_pred_asym, y_pis_asym = mapie_qr_asym.predict(X, symmetry=False) # Calculate coverage scores -coverage_score_sym = regression_coverage_score(y, y_pis_sym[:, 0], y_pis_sym[:, 1]) -coverage_score_asym = regression_coverage_score(y, y_pis_asym[:, 0], y_pis_asym[:, 1]) +coverage_score_sym = regression_coverage_score( + y, y_pis_sym[:, 0], y_pis_sym[:, 1] +) +coverage_score_asym = regression_coverage_score( + y, y_pis_asym[:, 0], y_pis_asym[:, 1] +) # Sort the values for plotting order = np.argsort(X[:, 0]) @@ -93,8 +97,8 @@ # Explanation of the results """ -The symmetric intervals (`symmetry=True`) are easier to interpret and tend to have higher -coverage but might not adapt well to varying noise levels. The asymmetric intervals -(`symmetry=False`) are more flexible and better capture heteroscedasticity but can appear -more jagged. +The symmetric intervals (`symmetry=True`) are easier to interpret and +tend to have higher coverage but might not adapt well to varying +noise levels. The asymmetric intervals (`symmetry=False`) are more +flexible and better capture heteroscedasticity but can appear more jagged. """ From 09721e1a8717aa2605ac99ab13482a8ff244d3b8 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Wed, 15 May 2024 14:47:23 +0200 Subject: [PATCH 069/128] FIX: image size --- README.rst | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index a6b57cbfd..971cd652d 100644 --- a/README.rst +++ b/README.rst @@ -172,23 +172,28 @@ and with the financial support from Région Ile de France and Confiance.ai. |Quantmetry| |Michelin| |ENS| |Confiance.ai| |IledeFrance| .. |Quantmetry| image:: https://www.quantmetry.com/wp-content/uploads/2020/08/08-Logo-quant-Texte-noir.svg - :height: 35 + :height: 35px + :width: 140px :target: https://www.quantmetry.com/ .. |Michelin| image:: https://agngnconpm.cloudimg.io/v7/https://dgaddcosprod.blob.core.windows.net/corporate-production/attachments/cls05tqdd9e0o0tkdghwi9m7n-clooe1x0c3k3x0tlu4cxi6dpn-bibendum-salut.full.png - :height: 35 + :height: 45px + :width: 45px :target: https://www.michelin.com/en/ .. |ENS| image:: https://file.diplomeo-static.com/file/00/00/01/34/13434.svg - :height: 35 + :height: 35px + :width: 140px :target: https://ens-paris-saclay.fr/en .. |Confiance.ai| image:: https://pbs.twimg.com/profile_images/1443838558549258264/EvWlv1Vq_400x400.jpg - :height: 35 + :height: 45px + :width: 45px :target: https://www.confiance.ai/ .. |IledeFrance| image:: https://www.iledefrance.fr/sites/default/files/logo/2024-02/logoGagnerok.svg - :height: 35 + :height: 35px + :width: 140px :target: https://www.iledefrance.fr/ From 06e56304193bc668a301249db11e36aed9384bfd Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Wed, 15 May 2024 14:47:39 +0200 Subject: [PATCH 070/128] FIX: missing ref in metrics.py --- mapie/metrics.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mapie/metrics.py b/mapie/metrics.py index e78f02c7c..20c5065f0 100644 --- a/mapie/metrics.py +++ b/mapie/metrics.py @@ -541,6 +541,11 @@ def regression_ssc_score( (intervals of different sizes), with constant intervals the result may be misinterpreted. + [3] Angelopoulos, A. N., & Bates, S. (2021). + A gentle introduction to conformal prediction and + distribution-free uncertainty quantification. + arXiv preprint arXiv:2107.07511. + Parameters ---------- y_true: NDArray of shape (n_samples,) From ee0e17d77952f7285ed5db56fc474c02cad377e6 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Wed, 15 May 2024 14:47:46 +0200 Subject: [PATCH 071/128] chore: Add METRICS section to table of contents --- doc/index.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/index.rst b/doc/index.rst index d3b00dc18..b5450722b 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -58,6 +58,13 @@ examples_calibration/index notebooks_calibration +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: METRICS + + theoretical_description_metrics + .. toctree:: :maxdepth: 2 :hidden: From 64a0299010df1bb2af45b571b21bb7124f63c6a4 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Wed, 15 May 2024 14:48:09 +0200 Subject: [PATCH 072/128] ADD: theoretical description for metrics --- doc/theoretical_description_metrics.rst | 264 ++++++++++++++++++++++++ 1 file changed, 264 insertions(+) create mode 100644 doc/theoretical_description_metrics.rst diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst new file mode 100644 index 000000000..75a26fc27 --- /dev/null +++ b/doc/theoretical_description_metrics.rst @@ -0,0 +1,264 @@ +.. title:: Theoretical Description : contents + +.. _theoretical_description_metrics: + +================================== +Theoretical Description of Metrics +================================== + +This document provides detailed descriptions of various metrics used to evaluate the performance of predictive models, particularly focusing on their ability to estimate uncertainties and calibrate predictions accurately. + + +1. General metrics +================== + +Regression Coverage Score +------------------------- + +The **Regression Coverage Score** calculates the fraction of true outcomes that fall within the provided prediction intervals. + +.. math:: + + C = \frac{1}{n} \sum_{i=1}^{n} \mathbf{1}(y_{\text{pred, low}}^{(i)} \leq y_{\text{true}}^{(i)} \leq y_{\text{pred, up}}^{(i)}) + +where: + +- :math:`n` is the number of samples, +- :math:`y_{\text{true}}^{(i)}` is the true value for the :math:`i`-th sample, +- :math:`y_{\text{pred, low}}^{(i)}` and :math:`y_{\text{pred, up}}^{(i)}` are the lower and upper bounds of the prediction intervals, respectively. + + +Regression Mean Width Score +--------------------------- + +The **Regression Mean Width Score** assesses the average width of the prediction intervals provided by the model. + +.. math:: + + \text{Mean Width} = \frac{1}{n} \sum_{i=1}^{n} (y_{\text{pred, up}}^{(i)} - y_{\text{pred, low}}^{(i)}) + + +Classification Coverage Score +----------------------------- + +The **Classification Coverage Score** measures how often the true class labels fall within the predicted sets. + +.. math:: + + C = \frac{1}{n} \sum_{i=1}^{n} \mathbf{1}(y_{\text{true}}^{(i)} \in \text{Set}_{\text{pred}}^{(i)}) + +Here, :math:`\text{Set}_{\text{pred}}^{(i)}` represents the set of predicted labels that could possibly contain the true label. + + +Classification Mean Width Score +------------------------------- + +For classification tasks, the **Classification Mean Width Score** calculates the average size of the prediction sets across all samples. + +.. math:: + + \text{Mean Width} = \frac{1}{n} \sum_{i=1}^{n} |\text{Set}_{\text{pred}}^{(i)}| + +where :math:`|\text{Set}_{\text{pred}}^{(i)}|` denotes the number of classes included in the prediction set for sample :math:`i`. + + +Size-Stratified Coverage (SSC) +------------------------------- + +**Size-Stratified Coverage (SSC)** evaluates how the size of prediction sets or intervals affects their ability to cover the true outcomes [1]. It's calculated separately for classification and regression: + +**Regression:** + +.. math:: + + \text{SSC}_{\text{regression}} = \sum_{k=1}^{K} \left( \frac{1}{|I_k|} \sum_{i \in I_k} \mathbf{1}(y_{\text{pred, low}}^{(i)} \leq y_{\text{true}}^{(i)} \leq y_{\text{pred, up}}^{(i)}) \right) + +**Classification:** + +.. math:: + + \text{SSC}_{\text{classification}} = \sum_{k=1}^{K} \left( \frac{1}{|S_k|} \sum_{i \in S_k} \mathbf{1}(y_{\text{true}}^{(i)} \in \text{Set}_{\text{pred}}^{(i)}) \right) + +where: + +- :math:`K` is the number of distinct size groups, +- :math:`I_k` and :math:`S_k` are the indices of samples whose prediction intervals or sets belong to the :math:`k`-th size group. + + +Hilbert-Schmidt Independence Criterion (HSIC) +---------------------------------------------- + +**Hilbert-Schmidt Independence Criterion (HSIC)** is a non-parametric measure of independence between two variables, applied here to test the independence of interval sizes from their coverage indicators [4]. + +.. math:: + + \text{HSIC} = \operatorname{trace}(\mathbf{H} \mathbf{K} \mathbf{H} \mathbf{L}) + +where: + +- :math:`\mathbf{K}` and :math:`\mathbf{L}` are the kernel matrices representing the interval sizes and coverage indicators, respectively. +- :math:`\mathbf{H}` is the centering matrix, :math:`\mathbf{H} = \mathbf{I} - \frac{1}{n} \mathbf{11}^\top`. + +This measure is crucial for determining whether certain sizes of prediction intervals are systematically more or less likely to contain the true values, which can highlight biases in interval-based predictions. + + +Coverage Width-Based Criterion (CWC) +------------------------------------ + +The **Coverage Width-Based Criterion (CWC)** evaluates prediction intervals by balancing their empirical coverage and width. It is designed to both reward narrow intervals and penalize those that do not achieve a specified coverage probability [6]. + +.. math:: + + \text{CWC} = (1 - \text{Mean Width Score}) \times \exp\left(-\eta \times (\text{Coverage Score} - (1-\alpha))^2\right) + + + +Regression MWI Score +-------------------- + +The **Regression MWI (Mean Winkler Interval) Score** evaluates prediction intervals by combining their width with a penalty for intervals that do not contain the observation [8, 10]. + +.. math:: + + \text{MWI Score} = \frac{1}{n} \sum_{i=1}^{n} (y_{\text{pred, up}}^{(i)} - y_{\text{pred, low}}^{(i)}) + \frac{2}{\alpha} \sum_{i=1}^{n} \max(0, |y_{\text{true}}^{(i)} - y_{\text{pred, boundary}}^{(i)}|) + +where :math:`y_{\text{pred, boundary}}^{(i)}` is the nearest interval boundary not containing :math:`y_{\text{true}}^{(i)}`, and :math:`\alpha` is the significance level. + + + +2. Calibration metrics +====================== + +Expected Calibration Error (ECE) +-------------------------------- + +**Expected Calibration Error (ECE)** measures the difference between predicted probabilities of a model and the actual outcomes, across different bins of predicted probabilities [7]. + +.. math:: + + \text{ECE} = \sum_{b=1}^{B} \frac{n_b}{n} | \text{acc}(b) - \text{conf}(b) | + +where: + +- :math:`B` is the total number of bins, +- :math:`n_b` is the number of samples in bin :math:`b`, +- :math:`\text{acc}(b)` is the accuracy within bin :math:`b`, +- :math:`\text{conf}(b)` is the mean predicted probability in bin :math:`b`. + + +Top-Label Expected Calibration Error (Top-Label ECE) +---------------------------------------------------- + +**Top-Label ECE** focuses on the class predicted with the highest confidence for each sample, assessing whether these top-predicted confidences align well with actual outcomes. It is calculated by dividing the confidence score range into bins and comparing the mean confidence against empirical accuracy within these bins [5]. + +.. math:: + + \text{Top-Label ECE} = \sum_{b=1}^{B} \frac{n_b}{n} \left| \text{acc}_b - \text{conf}_b \right| + +where: + +- :math:`n` is the total number of samples, +- :math:`n_b` is the number of samples in bin :math:`b`, +- :math:`\text{acc}_b` is the empirical accuracy in bin :math:`b`, +- :math:`\text{conf}_b` is the average confidence of the top label in bin :math:`b`. + +This metric is especially useful in multi-class classification to ensure that the model is neither overconfident nor underconfident in its predictions. + + +Cumulative Differences +---------------------- + +**Cumulative Differences** calculates the cumulative differences between sorted true values and prediction scores, helping to understand how well the prediction scores correspond to the actual outcomes when both are ordered by the score [2]. + +.. math:: + + \text{Cumulative Differences} = \frac{1}{n} \sum_{i=1}^{n} (y_{\text{true,sorted}}^{(i)} - y_{\text{score,sorted}}^{(i)}) + + +Kolmogorov-Smirnov Statistic for Calibration +-------------------------------------------- + +This statistic measures the maximum absolute deviation between the empirical cumulative distribution function (ECDF) of observed outcomes and predicted probabilities [2, 3, 11]. + +.. math:: + + \text{KS Statistic} = \sup_x |F_n(x) - S_n(x)| + +where :math:`F_n(x)` is the ECDF of the predicted probabilities and :math:`S_n(x)` is the ECDF of the observed outcomes. + + +Kuiper's Statistic +------------------ + +**Kuiper's Statistic** considers both the maximum deviation above and below the mean cumulative difference, making it more sensitive to deviations at the tails of the distribution [2, 3, 11]. + +.. math:: + + \text{Kuiper's Statistic} = \max(F_n(x) - S_n(x)) + \max(S_n(x) - F_n(x)) + + +Spiegelhalter’s Test +-------------------- + +**Spiegelhalter’s Test** assesses the calibration of binary predictions based on a transformation of the Brier score [9]. + +.. math:: + + \text{Spiegelhalter's Statistic} = \frac{\sum (y_{\text{true}} - y_{\text{score}})(1 - 2y_{\text{score}})}{\sqrt{\sum (1 - 2y_{\text{score}})^2 y_{\text{score}} (1 - y_{\text{score}})}} + + + +References +========== + +[1] Angelopoulos, A. N., & Bates, S. (2021). +A gentle introduction to conformal prediction and +distribution-free uncertainty quantification. +arXiv preprint arXiv:2107.07511. + +[2] Arrieta-Ibarra I, Gujral P, Tannen J, Tygert M, Xu C. +Metrics of calibration for probabilistic predictions. +The Journal of Machine Learning Research. 2022 Jan 1;23(1):15886-940. + +[3] D. A. Darling. A. J. F. Siegert. +The First Passage Problem for a Continuous Markov Process. +Ann. Math. Statist. 24 (4) 624 - 639, December, 1953. + +[4] Feldman, S., Bates, S., & Romano, Y. (2021). +Improving conditional coverage via orthogonal quantile regression. +Advances in Neural Information Processing Systems, 34, 2060-2071. + +[5] Gupta, Chirag, and Aaditya K. Ramdas. +"Top-label calibration and multiclass-to-binary reductions." +arXiv preprint arXiv:2107.08353 (2021). + +[6] Khosravi, Abbas, Saeid Nahavandi, and Doug Creighton. +"Construction of optimal prediction intervals for load forecasting +problems." +IEEE Transactions on Power Systems 25.3 (2010): 1496-1503. + +[7] Naeini, Mahdi Pakdaman, Gregory Cooper, and Milos Hauskrecht. +"Obtaining well calibrated probabilities using bayesian binning." +Twenty-Ninth AAAI Conference on Artificial Intelligence. 2015. + +[8] Robert L. Winkler +"A Decision-Theoretic Approach to Interval Estimation", +Journal of the American Statistical Association, +volume 67, pages 187-191 (1972) +(https://doi.org/10.1080/01621459.1972.10481224) + +[9] Spiegelhalter DJ. +Probabilistic prediction in patient management and clinical trials. +Statistics in medicine. +1986 Sep;5(5):421-33. + +[10] Tilmann Gneiting and Adrian E Raftery +"Strictly Proper Scoring Rules, Prediction, and Estimation", +Journal of the American Statistical Association, +volume 102, pages 359-378 (2007) +(https://doi.org/10.1198/016214506000001437) (Section 6.2) + +[11] Tygert M. +Calibration of P-values for calibration and for deviation +of a subpopulation from the full population. +arXiv preprint arXiv:2202.00100.2022 Jan 31. From 3462250fb4321953f0b9a1e75dff1cb02fc5ff9f Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Wed, 15 May 2024 18:01:43 +0200 Subject: [PATCH 073/128] chore: Update urllib to requests for downloading data --- .../3-scientific-articles/plot_kim2020_simulations.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/examples/regression/3-scientific-articles/plot_kim2020_simulations.py b/examples/regression/3-scientific-articles/plot_kim2020_simulations.py index c1e184d5f..10795aea7 100644 --- a/examples/regression/3-scientific-articles/plot_kim2020_simulations.py +++ b/examples/regression/3-scientific-articles/plot_kim2020_simulations.py @@ -30,10 +30,9 @@ """ from __future__ import annotations -import ssl +import requests from io import BytesIO from typing import Any, Optional, Tuple -from urllib.request import urlopen from zipfile import ZipFile import matplotlib.pyplot as plt @@ -69,9 +68,8 @@ def get_X_y() -> Tuple[NDArray, NDArray]: zip_folder = "BlogFeedback.zip" csv_file = "blogData_train.csv" url = website + page + folder + zip_folder - ssl._create_default_https_context = ssl._create_unverified_context - resp = urlopen(url) - zipfile = ZipFile(BytesIO(resp.read())) + response = requests.get(url) + zipfile = ZipFile(BytesIO(response.content)) df = pd.read_csv(zipfile.open(csv_file)).to_numpy() X = df[:, :-1] y = np.log(1 + df[:, -1]) From e65a08157bdc6b28961f0568b4641fe73cb9c483 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Wed, 15 May 2024 18:12:25 +0200 Subject: [PATCH 074/128] Update notebook links in regression, classification and multilabel_classification documentation --- doc/notebooks_classification.rst | 8 ++++---- doc/notebooks_multilabel_classification.rst | 8 ++++---- doc/notebooks_regression.rst | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/doc/notebooks_classification.rst b/doc/notebooks_classification.rst index dc25e1ac2..35747de19 100755 --- a/doc/notebooks_classification.rst +++ b/doc/notebooks_classification.rst @@ -6,8 +6,8 @@ problems for computer vision settings that are too heavy to be included in the e galleries. -1. Estimating prediction sets on the Cifar10 dataset : `notebook `_ ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- +1. Estimating prediction sets on the Cifar10 dataset : `cifar_notebook `_ +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -2. Top-label calibration for outputs of ML models : `notebook `_ --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +2. Top-label calibration for outputs of ML models : `top_label_notebook `_ +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ diff --git a/doc/notebooks_multilabel_classification.rst b/doc/notebooks_multilabel_classification.rst index e9160169b..3826f7ff2 100644 --- a/doc/notebooks_multilabel_classification.rst +++ b/doc/notebooks_multilabel_classification.rst @@ -5,8 +5,8 @@ The following examples present advanced analyses on multi-label classification problems with different methods proposed in MAPIE. -1. Overview of Recall Control for Multi-Label Classification : `notebook `_ ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +1. Overview of Recall Control for Multi-Label Classification : `recall_notebook `_ +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -2. Overview of Precision Control for Multi-Label Classification : `notebook `_ ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- \ No newline at end of file +2. Overview of Precision Control for Multi-Label Classification : `precision_notebook `_ +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- \ No newline at end of file diff --git a/doc/notebooks_regression.rst b/doc/notebooks_regression.rst index 4ac493fa8..24b8ce12e 100755 --- a/doc/notebooks_regression.rst +++ b/doc/notebooks_regression.rst @@ -8,11 +8,11 @@ This section lists a series of Jupyter notebooks hosted on the MAPIE Github repo ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -2. Estimating the uncertainties in the exoplanet masses : `notebook `_ ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +2. Estimating the uncertainties in the exoplanet masses : `exoplanet_notebook `_ +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -3. Estimating prediction intervals for time series forecast with EnbPI and ACI : `notebook `_ --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +3. Estimating prediction intervals for time series forecast with EnbPI and ACI : `ts_notebook `_ +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- From 9f8b451c54c7bf48f83dba551449ba490a6cad1a Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Wed, 15 May 2024 18:12:36 +0200 Subject: [PATCH 075/128] chore: Add verbose mode to LGBMRegressor in plot_cqr_tutorial.py --- examples/regression/4-tutorials/plot_cqr_tutorial.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/regression/4-tutorials/plot_cqr_tutorial.py b/examples/regression/4-tutorials/plot_cqr_tutorial.py index f370fa78f..5e92e4542 100644 --- a/examples/regression/4-tutorials/plot_cqr_tutorial.py +++ b/examples/regression/4-tutorials/plot_cqr_tutorial.py @@ -121,7 +121,8 @@ class :class:`~mapie.subsample.Subsample` (note that the `alpha` parameter is estimator = LGBMRegressor( objective='quantile', alpha=0.5, - random_state=random_state + random_state=random_state, + verbose=-1 ) params_distributions = dict( num_leaves=randint(low=10, high=50), @@ -135,7 +136,6 @@ class :class:`~mapie.subsample.Subsample` (note that the `alpha` parameter is n_jobs=-1, n_iter=10, cv=KFold(n_splits=5, shuffle=True), - verbose=0, random_state=random_state ) optim_model.fit(X_train, y_train) From c7fd1bc88af9f3b3d12eb690e90ebae7f484be10 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Wed, 15 May 2024 18:12:59 +0200 Subject: [PATCH 076/128] Update theoretical description titles to reflect the specific type --- doc/theoretical_description_binary_classification.rst | 2 +- doc/theoretical_description_classification.rst | 4 +++- doc/theoretical_description_conformity_scores.rst | 2 +- doc/theoretical_description_multilabel_classification.rst | 2 +- doc/theoretical_description_regression.rst | 2 +- 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/doc/theoretical_description_binary_classification.rst b/doc/theoretical_description_binary_classification.rst index 877bf83f4..55e2f6144 100644 --- a/doc/theoretical_description_binary_classification.rst +++ b/doc/theoretical_description_binary_classification.rst @@ -1,4 +1,4 @@ -.. title:: Theoretical Description : contents +.. title:: Theoretical Description Binary Classification : contents .. _theoretical_description_binay_classification: diff --git a/doc/theoretical_description_classification.rst b/doc/theoretical_description_classification.rst index aa5c08060..a8ef17830 100644 --- a/doc/theoretical_description_classification.rst +++ b/doc/theoretical_description_classification.rst @@ -1,4 +1,4 @@ -.. title:: Theoretical Description : contents +.. title:: Theoretical Description Classification : contents .. _theoretical_description_classification: @@ -141,8 +141,10 @@ Despite the RAPS method having a relatively small set size, its coverage tends t of the last label in the prediction set. This randomization is done as follows: - First : define the :math:`V` parameter: + .. math:: V_i = (s_i(X_i, Y_i) - \hat{q}_{1-\alpha}) / \left(\hat{\mu}(X_i)_{\pi_k} + \lambda \mathbb{1} (k > k_{reg})\right) + - Compare each :math:`V_i` to :math:`U \sim` Unif(0, 1) - If :math:`V_i \leq U`, the last included label is removed, else we keep the prediction set as it is. diff --git a/doc/theoretical_description_conformity_scores.rst b/doc/theoretical_description_conformity_scores.rst index b280fc530..8ea72b6ff 100644 --- a/doc/theoretical_description_conformity_scores.rst +++ b/doc/theoretical_description_conformity_scores.rst @@ -1,4 +1,4 @@ -.. title:: Theoretical Description : contents +.. title:: Theoretical Description Conformity Scores : contents .. _theoretical_description_conformity_scores: diff --git a/doc/theoretical_description_multilabel_classification.rst b/doc/theoretical_description_multilabel_classification.rst index 23e0536c4..011061e00 100644 --- a/doc/theoretical_description_multilabel_classification.rst +++ b/doc/theoretical_description_multilabel_classification.rst @@ -1,4 +1,4 @@ -.. title:: Theoretical Description : contents +.. title:: Theoretical Description Multi label Classification : contents .. _theoretical_description_multilabel_classification: diff --git a/doc/theoretical_description_regression.rst b/doc/theoretical_description_regression.rst index ae4b7c346..c755975df 100644 --- a/doc/theoretical_description_regression.rst +++ b/doc/theoretical_description_regression.rst @@ -1,4 +1,4 @@ -.. title:: Theoretical Description : contents +.. title:: Theoretical Description Regression : contents .. _theoretical_description_regression: From a99cb4d10cb2a761bf0d97b98532e4b91bdebb57 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:26:11 +0200 Subject: [PATCH 077/128] chore: Add citation information to README.rst --- README.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.rst b/README.rst index a6b57cbfd..1c1bdd4bd 100644 --- a/README.rst +++ b/README.rst @@ -224,3 +224,9 @@ and with the financial support from Région Ile de France and Confiance.ai. ========== MAPIE is free and open-source software licensed under the `3-clause BSD license `_. + + +📚 Citation +=========== + +If you use MAPIE in your research, please cite using `citations file `_ on our repository. From 14c6dfb2550b4b2e2bc33516fef7d122fe5076d4 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:29:13 +0200 Subject: [PATCH 078/128] Update doc/theoretical_description_regression.rst Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- doc/theoretical_description_regression.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/theoretical_description_regression.rst b/doc/theoretical_description_regression.rst index 9479f4645..12378f3a1 100644 --- a/doc/theoretical_description_regression.rst +++ b/doc/theoretical_description_regression.rst @@ -255,10 +255,14 @@ coverage value. Notations and Definitions ------------------------- +- :math:`\mathcal{I}_1` is the set of indices of the data in the training set. +- :math:`\mathcal{I}_2` is the set of indices of the data in the calibration set. +- :math:`\hat{q}_{\alpha_{\text{low}}}`: Lower quantile model trained on :math:`{(X_i, Y_i) : i \in \mathcal{I}_1}`. +- :math:`\hat{q}_{\alpha_{\text{high}}}`: Upper quantile model trained on :math:`{(X_i, Y_i) : i \in \mathcal{I}_1}`. - :math:`E_i`: Residuals for the i-th sample in the calibration set. - :math:`E_{\text{low}}`: Residuals from the lower quantile model. - :math:`E_{\text{high}}`: Residuals from the upper quantile model. -- :math:`Q_{1-\alpha}(E, \mathcal{I}_2)`: The :math:`(1-\alpha)(1+1/|\mathcal{I}_2|)`-th empirical quantile of the set :math:`{E_i : i \in \mathcal{I}_2}`, where :math:`\mathcal{I}_2` is the set of indices of the residuals in the calibration set. +- :math:`Q_{1-\alpha}(E, \mathcal{I}_2)`: The :math:`(1-\alpha)(1+1/|\mathcal{I}_2|)`-th empirical quantile of the set :math:`{E_i : i \in \mathcal{I}_2}`. Mathematical Formulation ------------------------ From b66a67ac540726023fb92083cc2d183cb58cbfcb Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:29:20 +0200 Subject: [PATCH 079/128] Update doc/theoretical_description_regression.rst Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- doc/theoretical_description_regression.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/theoretical_description_regression.rst b/doc/theoretical_description_regression.rst index 12378f3a1..ffa1368e5 100644 --- a/doc/theoretical_description_regression.rst +++ b/doc/theoretical_description_regression.rst @@ -278,7 +278,7 @@ Where: - :math:`\hat{q}_{\alpha_{\text{lo}}}(X_{n+1})` is the predicted lower quantile for the new sample. - :math:`\hat{q}_{\alpha_{\text{hi}}}(X_{n+1})` is the predicted upper quantile for the new sample. -Note: In the symmetric method, :math:`E_{\text{low}}` and :math:`E_{\text{high}}` are considered equal. +Note: In the symmetric method, :math:`E_{\text{low}}` and :math:`E_{\text{high}}` sets are no longer distinct. We consider directly the union set :math:`E_{\text{all}} = E_{\text{low}} \cup E_{\text{high}}` and the empirical quantile is then calculated on all the absolute (positive) residuals. As justified by the literature, this method offers a theoretical guarantee of the target coverage level :math:`1-\alpha`. From 501bade5c8108dfab6428f86e3e62e00f49e9875 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:35:13 +0200 Subject: [PATCH 080/128] Update theoretical_description_regression.rst --- doc/theoretical_description_regression.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/theoretical_description_regression.rst b/doc/theoretical_description_regression.rst index ffa1368e5..f616e3c91 100644 --- a/doc/theoretical_description_regression.rst +++ b/doc/theoretical_description_regression.rst @@ -246,7 +246,7 @@ is then higher. 9. The Conformalized Quantile Regression (CQR) Method -================================================== +===================================================== The conformalized quantile regression (CQR) method allows for better interval widths with heteroscedastic data. It uses quantile regressors with different quantile values to estimate @@ -275,6 +275,7 @@ The prediction interval :math:`\hat{C}_{n, \alpha}^{\text{CQR}}(X_{n+1})` for a \hat{q}_{\alpha_{\text{hi}}}(X_{n+1}) + Q_{1-\alpha}(E_{\text{high}}, \mathcal{I}_2)] Where: + - :math:`\hat{q}_{\alpha_{\text{lo}}}(X_{n+1})` is the predicted lower quantile for the new sample. - :math:`\hat{q}_{\alpha_{\text{hi}}}(X_{n+1})` is the predicted upper quantile for the new sample. From 051d30cbd4776fa432e4001afcef0ade150d4583 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:35:19 +0200 Subject: [PATCH 081/128] chore: Update plot_cqr_symmetry_difference.py in regression examples --- .../plot_cqr_symmetry_difference.py | 56 ++++++++++--------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py b/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py index 7cc23a3e7..608a5d0db 100644 --- a/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py +++ b/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py @@ -13,7 +13,9 @@ from mapie.metrics import regression_coverage_score from mapie.quantile_regression import MapieQuantileRegressor -# Generate synthetic data +############################################################################## +# We generate a synthetic data. + X, y = make_regression(n_samples=500, n_features=1, noise=20, random_state=59) # Define alpha level @@ -36,10 +38,10 @@ # Calculate coverage scores coverage_score_sym = regression_coverage_score( - y, y_pis_sym[:, 0], y_pis_sym[:, 1] +y, y_pis_sym[:, 0], y_pis_sym[:, 1] ) coverage_score_asym = regression_coverage_score( - y, y_pis_asym[:, 0], y_pis_asym[:, 1] +y, y_pis_asym[:, 0], y_pis_asym[:, 1] ) # Sort the values for plotting @@ -50,7 +52,12 @@ y_pred_asym_sorted = y_pred_asym[order] y_pis_asym_sorted = y_pis_asym[order] -# Plot symmetric prediction intervals +############################################################################## +# We will plot the predictions and prediction intervals for both symmetric +# and asymmetric intervals. The line represents the predicted values, the +# dashed lines represent the prediction intervals, and the shaded area +# represents the symmetric and asymmetric prediction intervals. + plt.figure(figsize=(14, 7)) plt.subplot(1, 2, 1) @@ -61,15 +68,15 @@ plt.plot(X_sorted, y_pis_sym_sorted[:, 0], color="C1", ls="--") plt.plot(X_sorted, y_pis_sym_sorted[:, 1], color="C1", ls="--") plt.fill_between( - X_sorted.ravel(), - y_pis_sym_sorted[:, 0].ravel(), - y_pis_sym_sorted[:, 1].ravel(), - alpha=0.2, +X_sorted.ravel(), +y_pis_sym_sorted[:, 0].ravel(), +y_pis_sym_sorted[:, 1].ravel(), +alpha=0.2, ) plt.title( - f"Symmetric Intervals\n" - f"Target and effective coverages for " - f"alpha={alpha:.2f}: ({1-alpha:.3f}, {coverage_score_sym:.3f})" +f"Symmetric Intervals\n" +f"Target and effective coverages for " +f"alpha={alpha:.2f}: ({1-alpha:.3f}, {coverage_score_sym:.3f})" ) # Plot asymmetric prediction intervals @@ -81,24 +88,21 @@ plt.plot(X_sorted, y_pis_asym_sorted[:, 0], color="C2", ls="--") plt.plot(X_sorted, y_pis_asym_sorted[:, 1], color="C2", ls="--") plt.fill_between( - X_sorted.ravel(), - y_pis_asym_sorted[:, 0].ravel(), - y_pis_asym_sorted[:, 1].ravel(), - alpha=0.2, +X_sorted.ravel(), +y_pis_asym_sorted[:, 0].ravel(), +y_pis_asym_sorted[:, 1].ravel(), +alpha=0.2, ) plt.title( - f"Asymmetric Intervals\n" - f"Target and effective coverages for " - f"alpha={alpha:.2f}: ({1-alpha:.3f}, {coverage_score_asym:.3f})" +f"Asymmetric Intervals\n" +f"Target and effective coverages for " +f"alpha={alpha:.2f}: ({1-alpha:.3f}, {coverage_score_asym:.3f})" ) - plt.tight_layout() plt.show() -# Explanation of the results -""" -The symmetric intervals (`symmetry=True`) are easier to interpret and -tend to have higher coverage but might not adapt well to varying -noise levels. The asymmetric intervals (`symmetry=False`) are more -flexible and better capture heteroscedasticity but can appear more jagged. -""" +############################################################################## +# The symmetric intervals (`symmetry=True`) are easier to interpret and +# tend to have higher coverage but might not adapt well to varying +# noise levels. The asymmetric intervals (`symmetry=False`) are more +# flexible and better capture heteroscedasticity but can appear more jagged. From 3f1d971237dc718df6e00f54e76a4c533bc5cc15 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:41:10 +0200 Subject: [PATCH 082/128] chore: Add conference paper citation to CITATION.cff --- CITATION.cff | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/CITATION.cff b/CITATION.cff index aead3751d..cf54d9290 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -9,6 +9,26 @@ version: 0.8.3 date-released: 2019-04-30 url: "https://github.com/scikit-learn-contrib/MAPIE" preferred-citation: + type: conference-paper + authors: + - family-names: "Cordier" + given-names: "Thibault" + - family-names: "Blot" + given-names: "Vincent" + - family-names: "Lacombe" + given-names: "Louis" + - family-names: "Morzadec" + given-names: "Thomas" + - family-names: "Capitaine" + given-names: "Arnaud" + - family-names: "Brunel" + given-names: "Nicolas" + title: "Flexible and Systematic Uncertainty Estimation with Conformal Prediction via the MAPIE library" + booktitle: "Conformal and Probabilistic Prediction with Applications" + pages: "549--581" + year: 2023 + organization: "PMLR" +old-citation: type: article authors: - family-names: "Taquet" From da15fa2f5dab3897fc92446c075b9cb662206c72 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:42:49 +0200 Subject: [PATCH 083/128] feat: Fix invalid certificate when downloading data --- HISTORY.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/HISTORY.rst b/HISTORY.rst index bf1572ad4..11a7422bc 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -7,6 +7,7 @@ History * Fix conda versionning. * Reduce precision for test in `MapieCalibrator`. +* Fix invalid certificate when downloading data. 0.8.3 (2024-03-01) ------------------ From 16c163233ac2178dbf4f02dfacc17c785fda61d5 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:43:51 +0200 Subject: [PATCH 084/128] Add citations utility to the documentation --- HISTORY.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/HISTORY.rst b/HISTORY.rst index bf1572ad4..e1249f70f 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -7,6 +7,7 @@ History * Fix conda versionning. * Reduce precision for test in `MapieCalibrator`. +* Add citations utility to the documentation. 0.8.3 (2024-03-01) ------------------ From 995e665af2ed8fe7f3846dc25ac7f5345b785dc6 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:45:36 +0200 Subject: [PATCH 085/128] chore: update indentation --- .../plot_cqr_symmetry_difference.py | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py b/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py index 608a5d0db..4d12b6bdf 100644 --- a/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py +++ b/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py @@ -38,10 +38,10 @@ # Calculate coverage scores coverage_score_sym = regression_coverage_score( -y, y_pis_sym[:, 0], y_pis_sym[:, 1] + y, y_pis_sym[:, 0], y_pis_sym[:, 1] ) coverage_score_asym = regression_coverage_score( -y, y_pis_asym[:, 0], y_pis_asym[:, 1] + y, y_pis_asym[:, 0], y_pis_asym[:, 1] ) # Sort the values for plotting @@ -68,15 +68,15 @@ plt.plot(X_sorted, y_pis_sym_sorted[:, 0], color="C1", ls="--") plt.plot(X_sorted, y_pis_sym_sorted[:, 1], color="C1", ls="--") plt.fill_between( -X_sorted.ravel(), -y_pis_sym_sorted[:, 0].ravel(), -y_pis_sym_sorted[:, 1].ravel(), -alpha=0.2, + X_sorted.ravel(), + y_pis_sym_sorted[:, 0].ravel(), + y_pis_sym_sorted[:, 1].ravel(), + alpha=0.2, ) plt.title( -f"Symmetric Intervals\n" -f"Target and effective coverages for " -f"alpha={alpha:.2f}: ({1-alpha:.3f}, {coverage_score_sym:.3f})" + f"Symmetric Intervals\n" + f"Target and effective coverages for " + f"alpha={alpha:.2f}: ({1-alpha:.3f}, {coverage_score_sym:.3f})" ) # Plot asymmetric prediction intervals @@ -88,15 +88,15 @@ plt.plot(X_sorted, y_pis_asym_sorted[:, 0], color="C2", ls="--") plt.plot(X_sorted, y_pis_asym_sorted[:, 1], color="C2", ls="--") plt.fill_between( -X_sorted.ravel(), -y_pis_asym_sorted[:, 0].ravel(), -y_pis_asym_sorted[:, 1].ravel(), -alpha=0.2, + X_sorted.ravel(), + y_pis_asym_sorted[:, 0].ravel(), + y_pis_asym_sorted[:, 1].ravel(), + alpha=0.2, ) plt.title( -f"Asymmetric Intervals\n" -f"Target and effective coverages for " -f"alpha={alpha:.2f}: ({1-alpha:.3f}, {coverage_score_asym:.3f})" + f"Asymmetric Intervals\n" + f"Target and effective coverages for " + f"alpha={alpha:.2f}: ({1-alpha:.3f}, {coverage_score_asym:.3f})" ) plt.tight_layout() plt.show() From e0c19c8d05016bf7b590f039aab4c83b1f1e0f26 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:46:33 +0200 Subject: [PATCH 086/128] Update doc/theoretical_description_metrics.rst Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- doc/theoretical_description_metrics.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index 75a26fc27..2458ad967 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -1,4 +1,4 @@ -.. title:: Theoretical Description : contents +.. title:: Theoretical Description Metrics : contents .. _theoretical_description_metrics: From e1941903cf85e80ba5adc7f5e786d870826e7d6f Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:46:41 +0200 Subject: [PATCH 087/128] Update doc/theoretical_description_metrics.rst Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- doc/theoretical_description_metrics.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index 2458ad967..71b2c4685 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -2,9 +2,9 @@ .. _theoretical_description_metrics: -================================== -Theoretical Description of Metrics -================================== +======================= +Theoretical Description +======================= This document provides detailed descriptions of various metrics used to evaluate the performance of predictive models, particularly focusing on their ability to estimate uncertainties and calibrate predictions accurately. From b4a2c382ed7c93b88fc4038c017734f705f8120d Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:46:59 +0200 Subject: [PATCH 088/128] Update doc/theoretical_description_metrics.rst Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- doc/theoretical_description_metrics.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index 71b2c4685..56d855b6f 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -204,7 +204,7 @@ Spiegelhalter’s Test .. math:: - \text{Spiegelhalter's Statistic} = \frac{\sum (y_{\text{true}} - y_{\text{score}})(1 - 2y_{\text{score}})}{\sqrt{\sum (1 - 2y_{\text{score}})^2 y_{\text{score}} (1 - y_{\text{score}})}} + \text{Spiegelhalter's Statistic} = \frac{\sum_{i=1}^n (y_i - \hat y_i)(1 - 2\hat y_i)}{\sqrt{\sum_{i=1}^n (1 - 2 \hat y_i)^2 \hat y_i (1 - \hat y_i)}} From d5b2d2f751018d7fe7f5dc629830d86ead9b8d06 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:47:08 +0200 Subject: [PATCH 089/128] Update doc/theoretical_description_metrics.rst Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- doc/theoretical_description_metrics.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index 56d855b6f..d323d5be2 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -116,7 +116,7 @@ The **Coverage Width-Based Criterion (CWC)** evaluates prediction intervals by b Regression MWI Score -------------------- -The **Regression MWI (Mean Winkler Interval) Score** evaluates prediction intervals by combining their width with a penalty for intervals that do not contain the observation [8, 10]. +The **MWI (Mean Winkler Interval) Score** evaluates prediction intervals by combining their width with a penalty for intervals that do not contain the observation [8, 10]. .. math:: From a49582fb54d657b31aab901e38357769fa46b201 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:47:16 +0200 Subject: [PATCH 090/128] Update doc/theoretical_description_metrics.rst Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- doc/theoretical_description_metrics.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index d323d5be2..84e631440 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -19,7 +19,7 @@ The **Regression Coverage Score** calculates the fraction of true outcomes that .. math:: - C = \frac{1}{n} \sum_{i=1}^{n} \mathbf{1}(y_{\text{pred, low}}^{(i)} \leq y_{\text{true}}^{(i)} \leq y_{\text{pred, up}}^{(i)}) + RCS = \frac{1}{n} \sum_{i=1}^{n} \mathbf{1}(\hat y^{\text{low}}_{i} \leq y_{i} \leq \hat y^{\text{up}}_{i}) where: From 696fee8586b7f0c1bc0dee37c66f435c5fbba609 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:47:23 +0200 Subject: [PATCH 091/128] Update doc/theoretical_description_metrics.rst Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- doc/theoretical_description_metrics.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index 84e631440..e444bb1d4 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -45,7 +45,7 @@ The **Classification Coverage Score** measures how often the true class labels f .. math:: - C = \frac{1}{n} \sum_{i=1}^{n} \mathbf{1}(y_{\text{true}}^{(i)} \in \text{Set}_{\text{pred}}^{(i)}) + CCS = \frac{1}{n} \sum_{i=1}^{n} \mathbf{1}(y_{i} \in \hat C(x_{i})) Here, :math:`\text{Set}_{\text{pred}}^{(i)}` represents the set of predicted labels that could possibly contain the true label. From b115895192de3e8b12fb10879c65f76ac8886462 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:47:32 +0200 Subject: [PATCH 092/128] Update doc/theoretical_description_metrics.rst Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- doc/theoretical_description_metrics.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index e444bb1d4..ded3bb734 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -120,9 +120,9 @@ The **MWI (Mean Winkler Interval) Score** evaluates prediction intervals by comb .. math:: - \text{MWI Score} = \frac{1}{n} \sum_{i=1}^{n} (y_{\text{pred, up}}^{(i)} - y_{\text{pred, low}}^{(i)}) + \frac{2}{\alpha} \sum_{i=1}^{n} \max(0, |y_{\text{true}}^{(i)} - y_{\text{pred, boundary}}^{(i)}|) + \text{MWI Score} = \frac{1}{n} \sum_{i=1}^{n} (\hat y^{\text{up}}_{i} - \hat y^{\text{low}}_{i}) + \frac{2}{\alpha} \sum_{i=1}^{n} \max(0, |y_{i} - \hat y^{\text{boundary}}_{i}|) -where :math:`y_{\text{pred, boundary}}^{(i)}` is the nearest interval boundary not containing :math:`y_{\text{true}}^{(i)}`, and :math:`\alpha` is the significance level. +where :math:`\hat y^{\text{boundary}}_{i}` is the nearest interval boundary not containing :math:`y_{i}`, and :math:`\alpha` is the significance level. From dfa2ca6eb3e143ae697ceae933a62c49bdf64ab2 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:47:43 +0200 Subject: [PATCH 093/128] Update doc/theoretical_description_metrics.rst Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- doc/theoretical_description_metrics.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index ded3bb734..c19c267b9 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -47,7 +47,7 @@ The **Classification Coverage Score** measures how often the true class labels f CCS = \frac{1}{n} \sum_{i=1}^{n} \mathbf{1}(y_{i} \in \hat C(x_{i})) -Here, :math:`\text{Set}_{\text{pred}}^{(i)}` represents the set of predicted labels that could possibly contain the true label. +Here, :math:`\hat C(x_{i})` represents the set of predicted labels that could possibly contain the true label for the :math:`i`-th observation :math:`x_{i}`. Classification Mean Width Score From e9810ecce8150baabe54ba920a9f60dade19c17c Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:47:55 +0200 Subject: [PATCH 094/128] Update doc/theoretical_description_metrics.rst Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- doc/theoretical_description_metrics.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index c19c267b9..5762eb1ee 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -172,7 +172,12 @@ Cumulative Differences .. math:: - \text{Cumulative Differences} = \frac{1}{n} \sum_{i=1}^{n} (y_{\text{true,sorted}}^{(i)} - y_{\text{score,sorted}}^{(i)}) + \text{Cumulative Differences} = \frac{1}{n} \sum_{i=1}^{n} (y_{\sigma_1(i)} - \hat y_{\sigma_2(i)}) + +where: + +- :math:`\sigma_1` is the permutation which sorts all the true values. +- :math:`\sigma_2` is the permutation which sorts all the predicted values. Kolmogorov-Smirnov Statistic for Calibration From 7ad8509d5071d1f992bc3ab246e5db753869e552 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:48:08 +0200 Subject: [PATCH 095/128] Update doc/theoretical_description_metrics.rst Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- doc/theoretical_description_metrics.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index 5762eb1ee..6488cfe53 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -35,7 +35,7 @@ The **Regression Mean Width Score** assesses the average width of the prediction .. math:: - \text{Mean Width} = \frac{1}{n} \sum_{i=1}^{n} (y_{\text{pred, up}}^{(i)} - y_{\text{pred, low}}^{(i)}) + \text{Mean Width} = \frac{1}{n} \sum_{i=1}^{n} (\hat y^{\text{up}}_{i} - \hat y^{\text{low}}_{i}) Classification Coverage Score From 04531d199f45787b85256f6068925fc1f1ef3dd1 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:48:25 +0200 Subject: [PATCH 096/128] Update doc/theoretical_description_metrics.rst Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- doc/theoretical_description_metrics.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index 6488cfe53..664777f9d 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -24,8 +24,8 @@ The **Regression Coverage Score** calculates the fraction of true outcomes that where: - :math:`n` is the number of samples, -- :math:`y_{\text{true}}^{(i)}` is the true value for the :math:`i`-th sample, -- :math:`y_{\text{pred, low}}^{(i)}` and :math:`y_{\text{pred, up}}^{(i)}` are the lower and upper bounds of the prediction intervals, respectively. +- :math:`y_{i}` is the true value for the :math:`i`-th sample, +- :math:`\hat y^{\text{low}}_{i}` and :math:`\hat y^{\text{up}}_{i}` are the lower and upper bounds of the prediction intervals, respectively. Regression Mean Width Score From 8f0c08137bb36b328846c0dff614075cbf1f990b Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:48:35 +0200 Subject: [PATCH 097/128] Update doc/theoretical_description_metrics.rst Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- doc/theoretical_description_metrics.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index 664777f9d..76ebe2138 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -57,9 +57,9 @@ For classification tasks, the **Classification Mean Width Score** calculates the .. math:: - \text{Mean Width} = \frac{1}{n} \sum_{i=1}^{n} |\text{Set}_{\text{pred}}^{(i)}| + \text{Mean Width} = \frac{1}{n} \sum_{i=1}^{n} |\hat C_{x_i}| -where :math:`|\text{Set}_{\text{pred}}^{(i)}|` denotes the number of classes included in the prediction set for sample :math:`i`. +where :math:`|\hat C_{x_i}|` denotes the number of classes included in the prediction set for sample :math:`i`. Size-Stratified Coverage (SSC) From eca3e52a951691f204e45a68669d111d59c9b251 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:49:16 +0200 Subject: [PATCH 098/128] Update doc/theoretical_description_metrics.rst Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- doc/theoretical_description_metrics.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index 76ebe2138..c3aea8837 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -113,7 +113,7 @@ The **Coverage Width-Based Criterion (CWC)** evaluates prediction intervals by b -Regression MWI Score +Mean Winkler Interval Score -------------------- The **MWI (Mean Winkler Interval) Score** evaluates prediction intervals by combining their width with a penalty for intervals that do not contain the observation [8, 10]. From 71a0e4673a91bbaeaccbcedd708d46c33148ea1a Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:49:27 +0200 Subject: [PATCH 099/128] Update doc/theoretical_description_metrics.rst Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- doc/theoretical_description_metrics.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index c3aea8837..988f19de7 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -71,7 +71,7 @@ Size-Stratified Coverage (SSC) .. math:: - \text{SSC}_{\text{regression}} = \sum_{k=1}^{K} \left( \frac{1}{|I_k|} \sum_{i \in I_k} \mathbf{1}(y_{\text{pred, low}}^{(i)} \leq y_{\text{true}}^{(i)} \leq y_{\text{pred, up}}^{(i)}) \right) + \text{SSC}_{\text{regression}} = \sum_{k=1}^{K} \left( \frac{1}{|I_k|} \sum_{i \in I_k} \mathbf{1}(\hat y^{\text{low}}_{i} \leq y_{i} \leq \hat y^{\text{up}}_{i}) \right) **Classification:** From 9d98b0df7bcec1af8c0f9ced4d760c48aee336ab Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:49:36 +0200 Subject: [PATCH 100/128] Update doc/theoretical_description_metrics.rst Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- doc/theoretical_description_metrics.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index 988f19de7..0046a41be 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -77,7 +77,7 @@ Size-Stratified Coverage (SSC) .. math:: - \text{SSC}_{\text{classification}} = \sum_{k=1}^{K} \left( \frac{1}{|S_k|} \sum_{i \in S_k} \mathbf{1}(y_{\text{true}}^{(i)} \in \text{Set}_{\text{pred}}^{(i)}) \right) + \text{SSC}_{\text{classification}} = \sum_{k=1}^{K} \left( \frac{1}{|S_k|} \sum_{i \in S_k} \mathbf{1}(y_{i} \in \hat C(x_i)) \right) where: From 009ad1575e8a07d1d5b307151ec34de296799e4a Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 17:51:03 +0200 Subject: [PATCH 101/128] Update Michelin image size in README.rst --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 971cd652d..37fb6434e 100644 --- a/README.rst +++ b/README.rst @@ -177,7 +177,7 @@ and with the financial support from Région Ile de France and Confiance.ai. :target: https://www.quantmetry.com/ .. |Michelin| image:: https://agngnconpm.cloudimg.io/v7/https://dgaddcosprod.blob.core.windows.net/corporate-production/attachments/cls05tqdd9e0o0tkdghwi9m7n-clooe1x0c3k3x0tlu4cxi6dpn-bibendum-salut.full.png - :height: 45px + :height: 50px :width: 45px :target: https://www.michelin.com/en/ From e2dcf3e80843725ca64273f6677dfdaf7f56d214 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 18:09:14 +0200 Subject: [PATCH 102/128] Update theoretical_description_metrics.rst with ECE and Top-Label ECE metrics --- doc/theoretical_description_metrics.rst | 41 +++++++++++++++---------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index 0046a41be..82c7f9166 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -129,40 +129,47 @@ where :math:`\hat y^{\text{boundary}}_{i}` is the nearest interval boundary not 2. Calibration metrics ====================== + Expected Calibration Error (ECE) -------------------------------- -**Expected Calibration Error (ECE)** measures the difference between predicted probabilities of a model and the actual outcomes, across different bins of predicted probabilities [7]. +The **Expected Calibration Error** (ECE) is a metric used to evaluate how well the predicted probabilities of a model align with the actual outcomes. The ECE provides a measure of the difference between predicted confidence levels and actual accuracy. The idea is to divide the predictions into bins based on confidence scores and then compare the accuracy within each bin to the average confidence level of the predictions in that bin. +The ECE is calculated as follows: .. math:: - - \text{ECE} = \sum_{b=1}^{B} \frac{n_b}{n} | \text{acc}(b) - \text{conf}(b) | + \text{ECE} = \sum_{i=1}^B \frac{|B_i|}{n} \left| \text{acc}(B_i) - \text{conf}(B_i) \right| where: +- :math:`B_i` is the set of indices of samples that fall into the i-th bin. +- :math:`|B_i|` is the number of samples in the i-th bin. +- :math:`n` is the total number of samples. +- :math:`\text{acc}(B_i)` is the accuracy within the i-th bin. +- :math:`\text{conf}(B_i)` is the average confidence score within the i-th bin. +- :math:`B` is the total number of bins. -- :math:`B` is the total number of bins, -- :math:`n_b` is the number of samples in bin :math:`b`, -- :math:`\text{acc}(b)` is the accuracy within bin :math:`b`, -- :math:`\text{conf}(b)` is the mean predicted probability in bin :math:`b`. +The difference between the average confidence and the actual accuracy within each bin is weighted by the proportion of samples in that bin, ensuring that bins with more samples have a larger influence on the final ECE value. Top-Label Expected Calibration Error (Top-Label ECE) ---------------------------------------------------- -**Top-Label ECE** focuses on the class predicted with the highest confidence for each sample, assessing whether these top-predicted confidences align well with actual outcomes. It is calculated by dividing the confidence score range into bins and comparing the mean confidence against empirical accuracy within these bins [5]. +The **Top-Label Expected Calibration Error** (Top-Label ECE) extends the concept of ECE to the multi-class setting. Instead of evaluating calibration over all predicted probabilities, Top-Label ECE focuses on the calibration of the most confident prediction (top-label) for each sample. -.. math:: +The Top-Label ECE is calculated as follows: - \text{Top-Label ECE} = \sum_{b=1}^{B} \frac{n_b}{n} \left| \text{acc}_b - \text{conf}_b \right| +.. math:: + \text{Top-Label ECE} = \frac{1}{L} \sum_{j=1}^L \sum_{i=1}^B \frac{|B_{i,j}|}{n_j} \left| \text{acc}(B_{i,j}) - \text{conf}(B_{i,j}) \right| where: - -- :math:`n` is the total number of samples, -- :math:`n_b` is the number of samples in bin :math:`b`, -- :math:`\text{acc}_b` is the empirical accuracy in bin :math:`b`, -- :math:`\text{conf}_b` is the average confidence of the top label in bin :math:`b`. - -This metric is especially useful in multi-class classification to ensure that the model is neither overconfident nor underconfident in its predictions. +- :math:`L` is the number of unique labels. +- :math:`B_{i,j}` is the set of indices of samples that fall into the i-th bin for label j. +- :math:`|B_{i,j}|` is the number of samples in the i-th bin for label j. +- :math:`n_j` is the total number of samples for label j. +- :math:`\text{acc}(B_{i,j})` is the accuracy within the i-th bin for label j. +- :math:`\text{conf}(B_{i,j})` is the average confidence score within the i-th bin for label j. +- :math:`B` is the total number of bins. + +For each label, the predictions are binned according to their confidence scores for that label. The calibration error is then calculated for each label separately and averaged across all labels to obtain the final Top-Label ECE value. This ensures that the calibration is measured specifically for the most confident prediction, which is often the most critical for decision-making in multi-class problems. Cumulative Differences From eaaff00c48993876465a7ff6e928ad21204ad23f Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 18:12:58 +0200 Subject: [PATCH 103/128] FIX: fix small issues with documentation --- doc/theoretical_description_metrics.rst | 33 +++++-------------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index 82c7f9166..ed624d919 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -2,20 +2,18 @@ .. _theoretical_description_metrics: -======================= Theoretical Description ======================= This document provides detailed descriptions of various metrics used to evaluate the performance of predictive models, particularly focusing on their ability to estimate uncertainties and calibrate predictions accurately. - -1. General metrics +1. General Metrics ================== Regression Coverage Score ------------------------- -The **Regression Coverage Score** calculates the fraction of true outcomes that fall within the provided prediction intervals. +The **Regression Coverage Score** calculates the fraction of true outcomes that fall within the provided prediction intervals. .. math:: @@ -27,7 +25,6 @@ where: - :math:`y_{i}` is the true value for the :math:`i`-th sample, - :math:`\hat y^{\text{low}}_{i}` and :math:`\hat y^{\text{up}}_{i}` are the lower and upper bounds of the prediction intervals, respectively. - Regression Mean Width Score --------------------------- @@ -37,7 +34,6 @@ The **Regression Mean Width Score** assesses the average width of the prediction \text{Mean Width} = \frac{1}{n} \sum_{i=1}^{n} (\hat y^{\text{up}}_{i} - \hat y^{\text{low}}_{i}) - Classification Coverage Score ----------------------------- @@ -49,7 +45,6 @@ The **Classification Coverage Score** measures how often the true class labels f Here, :math:`\hat C(x_{i})` represents the set of predicted labels that could possibly contain the true label for the :math:`i`-th observation :math:`x_{i}`. - Classification Mean Width Score ------------------------------- @@ -61,7 +56,6 @@ For classification tasks, the **Classification Mean Width Score** calculates the where :math:`|\hat C_{x_i}|` denotes the number of classes included in the prediction set for sample :math:`i`. - Size-Stratified Coverage (SSC) ------------------------------- @@ -84,11 +78,10 @@ where: - :math:`K` is the number of distinct size groups, - :math:`I_k` and :math:`S_k` are the indices of samples whose prediction intervals or sets belong to the :math:`k`-th size group. - Hilbert-Schmidt Independence Criterion (HSIC) ---------------------------------------------- -**Hilbert-Schmidt Independence Criterion (HSIC)** is a non-parametric measure of independence between two variables, applied here to test the independence of interval sizes from their coverage indicators [4]. +The **Hilbert-Schmidt Independence Criterion (HSIC)** is a non-parametric measure of independence between two variables, applied here to test the independence of interval sizes from their coverage indicators [4]. .. math:: @@ -101,7 +94,6 @@ where: This measure is crucial for determining whether certain sizes of prediction intervals are systematically more or less likely to contain the true values, which can highlight biases in interval-based predictions. - Coverage Width-Based Criterion (CWC) ------------------------------------ @@ -111,12 +103,10 @@ The **Coverage Width-Based Criterion (CWC)** evaluates prediction intervals by b \text{CWC} = (1 - \text{Mean Width Score}) \times \exp\left(-\eta \times (\text{Coverage Score} - (1-\alpha))^2\right) - - Mean Winkler Interval Score --------------------- +--------------------------- -The **MWI (Mean Winkler Interval) Score** evaluates prediction intervals by combining their width with a penalty for intervals that do not contain the observation [8, 10]. +The **Mean Winkler Interval (MWI) Score** evaluates prediction intervals by combining their width with a penalty for intervals that do not contain the observation [8, 10]. .. math:: @@ -124,17 +114,13 @@ The **MWI (Mean Winkler Interval) Score** evaluates prediction intervals by comb where :math:`\hat y^{\text{boundary}}_{i}` is the nearest interval boundary not containing :math:`y_{i}`, and :math:`\alpha` is the significance level. - - -2. Calibration metrics +2. Calibration Metrics ====================== - Expected Calibration Error (ECE) -------------------------------- The **Expected Calibration Error** (ECE) is a metric used to evaluate how well the predicted probabilities of a model align with the actual outcomes. The ECE provides a measure of the difference between predicted confidence levels and actual accuracy. The idea is to divide the predictions into bins based on confidence scores and then compare the accuracy within each bin to the average confidence level of the predictions in that bin. -The ECE is calculated as follows: .. math:: \text{ECE} = \sum_{i=1}^B \frac{|B_i|}{n} \left| \text{acc}(B_i) - \text{conf}(B_i) \right| @@ -149,7 +135,6 @@ where: The difference between the average confidence and the actual accuracy within each bin is weighted by the proportion of samples in that bin, ensuring that bins with more samples have a larger influence on the final ECE value. - Top-Label Expected Calibration Error (Top-Label ECE) ---------------------------------------------------- @@ -171,7 +156,6 @@ where: For each label, the predictions are binned according to their confidence scores for that label. The calibration error is then calculated for each label separately and averaged across all labels to obtain the final Top-Label ECE value. This ensures that the calibration is measured specifically for the most confident prediction, which is often the most critical for decision-making in multi-class problems. - Cumulative Differences ---------------------- @@ -186,7 +170,6 @@ where: - :math:`\sigma_1` is the permutation which sorts all the true values. - :math:`\sigma_2` is the permutation which sorts all the predicted values. - Kolmogorov-Smirnov Statistic for Calibration -------------------------------------------- @@ -198,7 +181,6 @@ This statistic measures the maximum absolute deviation between the empirical cum where :math:`F_n(x)` is the ECDF of the predicted probabilities and :math:`S_n(x)` is the ECDF of the observed outcomes. - Kuiper's Statistic ------------------ @@ -208,7 +190,6 @@ Kuiper's Statistic \text{Kuiper's Statistic} = \max(F_n(x) - S_n(x)) + \max(S_n(x) - F_n(x)) - Spiegelhalter’s Test -------------------- @@ -218,8 +199,6 @@ Spiegelhalter’s Test \text{Spiegelhalter's Statistic} = \frac{\sum_{i=1}^n (y_i - \hat y_i)(1 - 2\hat y_i)}{\sqrt{\sum_{i=1}^n (1 - 2 \hat y_i)^2 \hat y_i (1 - \hat y_i)}} - - References ========== From ee62fda24e7634366efd0b54fcefad09911b2c00 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 16 May 2024 18:13:52 +0200 Subject: [PATCH 104/128] Add documentation for metrics. --- HISTORY.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/HISTORY.rst b/HISTORY.rst index bf1572ad4..23cf7f3fd 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -7,6 +7,7 @@ History * Fix conda versionning. * Reduce precision for test in `MapieCalibrator`. +* Add documentation for metrics. 0.8.3 (2024-03-01) ------------------ From d86006f30f84973d555597e1b3db3cb88e25e368 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Tue, 21 May 2024 10:46:34 +0200 Subject: [PATCH 105/128] Apply suggestions from TCO from code review Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- doc/theoretical_description_metrics.rst | 30 ++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index ed624d919..f9adfbe9a 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -13,7 +13,7 @@ This document provides detailed descriptions of various metrics used to evaluate Regression Coverage Score ------------------------- -The **Regression Coverage Score** calculates the fraction of true outcomes that fall within the provided prediction intervals. +The **Regression Coverage Score (RCS)** calculates the fraction of true outcomes that fall within the provided prediction intervals. .. math:: @@ -28,16 +28,16 @@ where: Regression Mean Width Score --------------------------- -The **Regression Mean Width Score** assesses the average width of the prediction intervals provided by the model. +The **Regression Mean Width Score (RMWS)** assesses the average width of the prediction intervals provided by the model. .. math:: - \text{Mean Width} = \frac{1}{n} \sum_{i=1}^{n} (\hat y^{\text{up}}_{i} - \hat y^{\text{low}}_{i}) + \text{RMWS} = \frac{1}{n} \sum_{i=1}^{n} (\hat y^{\text{up}}_{i} - \hat y^{\text{low}}_{i}) Classification Coverage Score ----------------------------- -The **Classification Coverage Score** measures how often the true class labels fall within the predicted sets. +The **Classification Coverage Score (CCS)** measures how often the true class labels fall within the predicted sets. .. math:: @@ -48,16 +48,16 @@ Here, :math:`\hat C(x_{i})` represents the set of predicted labels that could po Classification Mean Width Score ------------------------------- -For classification tasks, the **Classification Mean Width Score** calculates the average size of the prediction sets across all samples. +For classification tasks, the **Classification Mean Width Score (CMWS)** calculates the average size of the prediction sets across all samples. .. math:: - \text{Mean Width} = \frac{1}{n} \sum_{i=1}^{n} |\hat C_{x_i}| + \text{CMWS} = \frac{1}{n} \sum_{i=1}^{n} |\hat C(x_i)| -where :math:`|\hat C_{x_i}|` denotes the number of classes included in the prediction set for sample :math:`i`. +where :math:`|\hat C(x_i)|` denotes the number of classes included in the prediction set for sample :math:`i`. -Size-Stratified Coverage (SSC) -------------------------------- +Size-Stratified Coverage +------------------------- **Size-Stratified Coverage (SSC)** evaluates how the size of prediction sets or intervals affects their ability to cover the true outcomes [1]. It's calculated separately for classification and regression: @@ -78,8 +78,8 @@ where: - :math:`K` is the number of distinct size groups, - :math:`I_k` and :math:`S_k` are the indices of samples whose prediction intervals or sets belong to the :math:`k`-th size group. -Hilbert-Schmidt Independence Criterion (HSIC) ----------------------------------------------- +Hilbert-Schmidt Independence Criterion +--------------------------------------- The **Hilbert-Schmidt Independence Criterion (HSIC)** is a non-parametric measure of independence between two variables, applied here to test the independence of interval sizes from their coverage indicators [4]. @@ -94,8 +94,8 @@ where: This measure is crucial for determining whether certain sizes of prediction intervals are systematically more or less likely to contain the true values, which can highlight biases in interval-based predictions. -Coverage Width-Based Criterion (CWC) ------------------------------------- +Coverage Width-Based Criterion +------------------------------ The **Coverage Width-Based Criterion (CWC)** evaluates prediction intervals by balancing their empirical coverage and width. It is designed to both reward narrow intervals and penalize those that do not achieve a specified coverage probability [6]. @@ -117,8 +117,8 @@ where :math:`\hat y^{\text{boundary}}_{i}` is the nearest interval boundary not 2. Calibration Metrics ====================== -Expected Calibration Error (ECE) --------------------------------- +Expected Calibration Error +-------------------------- The **Expected Calibration Error** (ECE) is a metric used to evaluate how well the predicted probabilities of a model align with the actual outcomes. The ECE provides a measure of the difference between predicted confidence levels and actual accuracy. The idea is to divide the predictions into bins based on confidence scores and then compare the accuracy within each bin to the average confidence level of the predictions in that bin. From b4c5ecedbf996333f3ede6c1807bb7b66bb2738e Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Tue, 21 May 2024 11:04:45 +0200 Subject: [PATCH 106/128] Update README.rst Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 1c1bdd4bd..224f6b274 100644 --- a/README.rst +++ b/README.rst @@ -229,4 +229,4 @@ MAPIE is free and open-source software licensed under the `3-clause BSD license 📚 Citation =========== -If you use MAPIE in your research, please cite using `citations file `_ on our repository. +If you use MAPIE in your research, please cite using `citations file `_ on our repository. From b4e04e6280bfb8a13049507a784c4112cd6293dd Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Tue, 21 May 2024 11:06:19 +0200 Subject: [PATCH 107/128] Update link to citation file in README.rst --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 224f6b274..5657f7ddf 100644 --- a/README.rst +++ b/README.rst @@ -229,4 +229,4 @@ MAPIE is free and open-source software licensed under the `3-clause BSD license 📚 Citation =========== -If you use MAPIE in your research, please cite using `citations file `_ on our repository. +If you use MAPIE in your research, please cite using `citations file `_ on our repository. From 4d4974cf5c63985f990e8d1867ba946ff3cf1a4a Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Tue, 21 May 2024 11:07:43 +0200 Subject: [PATCH 108/128] Update LICENSE in README.rst Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 5657f7ddf..406a60c99 100644 --- a/README.rst +++ b/README.rst @@ -223,7 +223,7 @@ and with the financial support from Région Ile de France and Confiance.ai. 📝 License ========== -MAPIE is free and open-source software licensed under the `3-clause BSD license `_. +MAPIE is free and open-source software licensed under the `3-clause BSD license `_. 📚 Citation From 4ee62189bbdb869d0ab4cafeaa90f90050289b3c Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Tue, 21 May 2024 11:43:43 +0200 Subject: [PATCH 109/128] Apply suggestions from Thibault from code review Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- .../plot_cqr_symmetry_difference.py | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py b/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py index 4d12b6bdf..13f827a90 100644 --- a/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py +++ b/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py @@ -26,15 +26,13 @@ gb_reg = GradientBoostingRegressor(loss="quantile", alpha=quantiles[1]) gb_reg.fit(X, y) -# MAPIE Quantile Regressor with symmetry=True -mapie_qr_sym = MapieQuantileRegressor(estimator=gb_reg, alpha=alpha) -mapie_qr_sym.fit(X, y) -y_pred_sym, y_pis_sym = mapie_qr_sym.predict(X, symmetry=True) - -# MAPIE Quantile Regressor with symmetry=False -mapie_qr_asym = MapieQuantileRegressor(estimator=gb_reg, alpha=alpha) -mapie_qr_asym.fit(X, y) -y_pred_asym, y_pis_asym = mapie_qr_asym.predict(X, symmetry=False) +# MAPIE Quantile Regressor +mapie_qr = MapieQuantileRegressor(estimator=gb_reg, alpha=alpha) +mapie_qr.fit(X, y) +y_pred_sym, y_pis_sym = mapie_qr.predict(X, symmetry=True) +y_pred_asym, y_pis_asym = mapie_qr.predict(X, symmetry=False) +y_qlow = mapie_qr.estimators_[0].predict(X) +y_qup = mapie_qr.estimators_[1].predict(X) # Calculate coverage scores coverage_score_sym = regression_coverage_score( @@ -51,6 +49,8 @@ y_pis_sym_sorted = y_pis_sym[order] y_pred_asym_sorted = y_pred_asym[order] y_pis_asym_sorted = y_pis_asym[order] +y_qlow = y_qlow[order] +y_qup = y_qup[order] ############################################################################## # We will plot the predictions and prediction intervals for both symmetric @@ -64,7 +64,9 @@ plt.xlabel("x") plt.ylabel("y") plt.scatter(X, y, alpha=0.3) -plt.plot(X_sorted, y_pred_sym_sorted, color="C1") +#plt.plot(X_sorted, y_pred_sym_sorted, color="C1") +plt.plot(X_sorted, y_qlow, color="C1") +plt.plot(X_sorted, y_qup, color="C1") plt.plot(X_sorted, y_pis_sym_sorted[:, 0], color="C1", ls="--") plt.plot(X_sorted, y_pis_sym_sorted[:, 1], color="C1", ls="--") plt.fill_between( @@ -84,7 +86,9 @@ plt.xlabel("x") plt.ylabel("y") plt.scatter(X, y, alpha=0.3) -plt.plot(X_sorted, y_pred_asym_sorted, color="C2") +#plt.plot(X_sorted, y_pred_asym_sorted, color="C2") +plt.plot(X_sorted, y_qlow, color="C2") +plt.plot(X_sorted, y_qup, color="C2") plt.plot(X_sorted, y_pis_asym_sorted[:, 0], color="C2", ls="--") plt.plot(X_sorted, y_pis_asym_sorted[:, 1], color="C2", ls="--") plt.fill_between( From b6ef8572e4e0a45cb26638f73e57a4377e86694a Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Tue, 21 May 2024 12:18:08 +0200 Subject: [PATCH 110/128] Update CITATION.cff to add booktitle --- CITATION.cff | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index cf54d9290..e22cd764d 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -10,6 +10,7 @@ date-released: 2019-04-30 url: "https://github.com/scikit-learn-contrib/MAPIE" preferred-citation: type: conference-paper + title: "Flexible and Systematic Uncertainty Estimation with Conformal Prediction via the MAPIE library" authors: - family-names: "Cordier" given-names: "Thibault" @@ -23,8 +24,8 @@ preferred-citation: given-names: "Arnaud" - family-names: "Brunel" given-names: "Nicolas" - title: "Flexible and Systematic Uncertainty Estimation with Conformal Prediction via the MAPIE library" - booktitle: "Conformal and Probabilistic Prediction with Applications" + collection-title: "Conformal and Probabilistic Prediction with Applications" + collection-type: proceedings pages: "549--581" year: 2023 organization: "PMLR" @@ -44,4 +45,4 @@ old-citation: doi: "10.48550/arXiv.2207.12274" journal: "arXiv preprint arXiv:2207.12274" title: "MAPIE: an open-source library for distribution-free uncertainty quantification" - year: 2021 \ No newline at end of file + year: 2021 From 88adb7364ab1434fdf736af28acca5bda9b69b39 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Tue, 21 May 2024 12:22:16 +0200 Subject: [PATCH 111/128] Update links to github page in README.rst --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 406a60c99..b02be7484 100644 --- a/README.rst +++ b/README.rst @@ -12,7 +12,7 @@ :target: https://mapie.readthedocs.io/en/stable/?badge=stable :alt: Documentation Status -.. |License| image:: https://img.shields.io/github/license/simai-ml/MAPIE +.. |License| image:: https://img.shields.io/github/license/scikit-learn-contrib/MAPIE :target: https://github.com/scikit-learn-contrib/MAPIE/blob/master/LICENSE .. |PythonVersion| image:: https://img.shields.io/pypi/pyversions/mapie @@ -33,7 +33,7 @@ .. |DOI| image:: https://img.shields.io/badge/10.48550/arXiv.2207.12274-B31B1B.svg :target: https://arxiv.org/abs/2207.12274 -.. image:: https://github.com/simai-ml/MAPIE/raw/master/doc/images/mapie_logo_nobg_cut.png +.. image:: https://github.com/scikit-learn-contrib/MAPIE/raw/master/doc/images/mapie_logo_nobg_cut.png :width: 400 :align: center @@ -158,7 +158,7 @@ The full documentation can be found `on this link `_ so that we can align on the work to be done. +We encourage you to `open an issue `_ so that we can align on the work to be done. It is generally a good idea to have a quick discussion before opening a pull request that is potentially out-of-scope. For more information on the contribution process, please go `here `_. From 5cc1e6f1cec4b5e564ac0c5a5ea70f6254eea698 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Tue, 21 May 2024 12:26:32 +0200 Subject: [PATCH 112/128] Update maxdepth for metrics documentation index.rst --- doc/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/index.rst b/doc/index.rst index b5450722b..53172ca43 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -59,7 +59,7 @@ notebooks_calibration .. toctree:: - :maxdepth: 2 + :maxdepth: 1 :hidden: :caption: METRICS From 454cd4ebc14fb029fa1f6e512ea339a438db568c Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Tue, 21 May 2024 14:10:18 +0200 Subject: [PATCH 113/128] FIX: small issues in plot_cqr_symmetry_difference.py in regression examples --- .../plot_cqr_symmetry_difference.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py b/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py index 13f827a90..4455c27dd 100644 --- a/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py +++ b/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py @@ -13,6 +13,8 @@ from mapie.metrics import regression_coverage_score from mapie.quantile_regression import MapieQuantileRegressor +random_state = 2 + ############################################################################## # We generate a synthetic data. @@ -22,13 +24,13 @@ alpha = 0.2 # Fit a Gradient Boosting Regressor for quantile regression -quantiles = [0.1, 0.9] -gb_reg = GradientBoostingRegressor(loss="quantile", alpha=quantiles[1]) -gb_reg.fit(X, y) +gb_reg = GradientBoostingRegressor( + loss="quantile", alpha=0.5, random_state=random_state +) # MAPIE Quantile Regressor mapie_qr = MapieQuantileRegressor(estimator=gb_reg, alpha=alpha) -mapie_qr.fit(X, y) +mapie_qr.fit(X, y, random_state=random_state) y_pred_sym, y_pis_sym = mapie_qr.predict(X, symmetry=True) y_pred_asym, y_pis_asym = mapie_qr.predict(X, symmetry=False) y_qlow = mapie_qr.estimators_[0].predict(X) @@ -64,7 +66,6 @@ plt.xlabel("x") plt.ylabel("y") plt.scatter(X, y, alpha=0.3) -#plt.plot(X_sorted, y_pred_sym_sorted, color="C1") plt.plot(X_sorted, y_qlow, color="C1") plt.plot(X_sorted, y_qup, color="C1") plt.plot(X_sorted, y_pis_sym_sorted[:, 0], color="C1", ls="--") @@ -86,7 +87,6 @@ plt.xlabel("x") plt.ylabel("y") plt.scatter(X, y, alpha=0.3) -#plt.plot(X_sorted, y_pred_asym_sorted, color="C2") plt.plot(X_sorted, y_qlow, color="C2") plt.plot(X_sorted, y_qup, color="C2") plt.plot(X_sorted, y_pis_asym_sorted[:, 0], color="C2", ls="--") @@ -106,7 +106,9 @@ plt.show() ############################################################################## -# The symmetric intervals (`symmetry=True`) are easier to interpret and -# tend to have higher coverage but might not adapt well to varying -# noise levels. The asymmetric intervals (`symmetry=False`) are more -# flexible and better capture heteroscedasticity but can appear more jagged. +# The symmetric intervals (`symmetry=True`) use a combined set of residuals +# for both bounds, while the asymmetric intervals use distinct residuals for +# each bound, allowing for more flexible and accurate intervals that reflect +# the heteroscedastic nature of the data. The resulting effective coverages +# demonstrate the theoretical guarantee of the target coverage level +# $(1−\alpha)$. From e319da2b14c8afc463846e5e2ad87eacf232652e Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Tue, 21 May 2024 14:13:29 +0200 Subject: [PATCH 114/128] FIX: issues of documentation with bullet points Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- doc/theoretical_description_metrics.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index f9adfbe9a..0ef73e480 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -126,6 +126,7 @@ The **Expected Calibration Error** (ECE) is a metric used to evaluate how well t \text{ECE} = \sum_{i=1}^B \frac{|B_i|}{n} \left| \text{acc}(B_i) - \text{conf}(B_i) \right| where: + - :math:`B_i` is the set of indices of samples that fall into the i-th bin. - :math:`|B_i|` is the number of samples in the i-th bin. - :math:`n` is the total number of samples. @@ -146,6 +147,7 @@ The Top-Label ECE is calculated as follows: \text{Top-Label ECE} = \frac{1}{L} \sum_{j=1}^L \sum_{i=1}^B \frac{|B_{i,j}|}{n_j} \left| \text{acc}(B_{i,j}) - \text{conf}(B_{i,j}) \right| where: + - :math:`L` is the number of unique labels. - :math:`B_{i,j}` is the set of indices of samples that fall into the i-th bin for label j. - :math:`|B_{i,j}|` is the number of samples in the i-th bin for label j. From 6edf468cd41d79066c31ff2c7693ea7cd31a7a34 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Tue, 21 May 2024 14:55:07 +0200 Subject: [PATCH 115/128] Update maxdepth to 0 in index.rst --- doc/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/index.rst b/doc/index.rst index 53172ca43..7bc75bbf7 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -59,7 +59,7 @@ notebooks_calibration .. toctree:: - :maxdepth: 1 + :maxdepth: 0 :hidden: :caption: METRICS From 422de43de2b4c4b5f9ca50eb32fb6fe5bb0722fa Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Wed, 22 May 2024 15:13:59 +0200 Subject: [PATCH 116/128] FIX: reset correct maxdepth --- doc/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/index.rst b/doc/index.rst index 7bc75bbf7..b5450722b 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -59,7 +59,7 @@ notebooks_calibration .. toctree:: - :maxdepth: 0 + :maxdepth: 2 :hidden: :caption: METRICS From 488a7b4f4587eb707eaba8e9d7612e068d704c34 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Wed, 22 May 2024 15:14:15 +0200 Subject: [PATCH 117/128] FIX: headers showing in sidebar --- doc/theoretical_description_metrics.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index 0ef73e480..26b4fa1c4 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -8,7 +8,7 @@ Theoretical Description This document provides detailed descriptions of various metrics used to evaluate the performance of predictive models, particularly focusing on their ability to estimate uncertainties and calibrate predictions accurately. 1. General Metrics -================== +------------------ Regression Coverage Score ------------------------- @@ -115,7 +115,7 @@ The **Mean Winkler Interval (MWI) Score** evaluates prediction intervals by comb where :math:`\hat y^{\text{boundary}}_{i}` is the nearest interval boundary not containing :math:`y_{i}`, and :math:`\alpha` is the significance level. 2. Calibration Metrics -====================== +---------------------- Expected Calibration Error -------------------------- @@ -201,8 +201,8 @@ Spiegelhalter’s Test \text{Spiegelhalter's Statistic} = \frac{\sum_{i=1}^n (y_i - \hat y_i)(1 - 2\hat y_i)}{\sqrt{\sum_{i=1}^n (1 - 2 \hat y_i)^2 \hat y_i (1 - \hat y_i)}} -References -========== +3. References +------------- [1] Angelopoulos, A. N., & Bates, S. (2021). A gentle introduction to conformal prediction and From a43eb63a9ae4cb8476916ed26a5606eefa1b7357 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Wed, 22 May 2024 15:36:44 +0200 Subject: [PATCH 118/128] FIX: name of plot description --- .../regression/1-quickstart/plot_cqr_symmetry_difference.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py b/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py index 4455c27dd..77271997c 100644 --- a/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py +++ b/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py @@ -1,7 +1,7 @@ """ -====================================================== -Plotting MAPIE Quantile Regressor prediction intervals -====================================================== +==================================== +Plotting CQR with symmetric argument +==================================== An example plot of :class:`~mapie.quantile_regression.MapieQuantileRegressor` illustrating the impact of the symmetry parameter. """ From 668b555da3339e93e6188f8541feadc7bc9c31cf Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Wed, 22 May 2024 16:23:31 +0200 Subject: [PATCH 119/128] FIX: add all metrics of calibration in the same spot --- doc/theoretical_description_calibration.rst | 117 ++----------------- doc/theoretical_description_metrics.rst | 123 +++++++++++++++----- 2 files changed, 98 insertions(+), 142 deletions(-) diff --git a/doc/theoretical_description_calibration.rst b/doc/theoretical_description_calibration.rst index 21df15f2d..c62540337 100644 --- a/doc/theoretical_description_calibration.rst +++ b/doc/theoretical_description_calibration.rst @@ -2,10 +2,9 @@ .. _theoretical_description_calibration: -======================= +####################### Theoretical Description -======================= - +####################### One method for multi-class calibration has been implemented in MAPIE so far : Top-Label Calibration [1]. @@ -34,8 +33,8 @@ To apply calibration directly to a multi-class context, Gupta et al. propose a f a multi-class calibration to multiple binary calibrations (M2B). -1. Top-Label ------------- +Top-Label +--------- Top-Label calibration is a calibration technique introduced by Gupta et al. to calibrate the model according to the highest score and the corresponding class (see [1] Section 2). This framework offers to apply binary calibration techniques to multi-class calibration. @@ -50,109 +49,8 @@ according to Top-Label calibration if: Pr(Y = c(X) \mid h(X), c(X)) = h(X) -2. Metrics for calibration --------------------------- - -**Expected calibration error** - -The main metric to check if the calibration is correct is the Expected Calibration Error (ECE). It is based on two -components, accuracy and confidence per bin. The number of bins is a hyperparamater :math:`M`, and we refer to a specific bin by -:math:`B_m`. - -.. math:: - \text{acc}(B_m) &= \frac{1}{\left| B_m \right|} \sum_{i \in B_m} {y}_i \\ - \text{conf}(B_m) &= \frac{1}{\left| B_m \right|} \sum_{i \in B_m} \hat{f}(x)_i - - -The ECE is the combination of these two metrics combined. - -.. math:: - \text{ECE} = \sum_{m=1}^M \frac{\left| B_m \right|}{n} \left| acc(B_m) - conf(B_m) \right| - -In simple terms, once all the different bins from the confidence scores have been created, we check the mean accuracy of each bin. -The absolute mean difference between the two is the ECE. Hence, the lower the ECE, the better the calibration was performed. - -**Top-Label ECE** - -In the top-label calibration, we only calculate the ECE for the top-label class. Hence, per top-label class, we condition the calculation -of the accuracy and confidence based on the top label and take the average ECE for each top-label. - -3. Statistical tests for calibration ------------------------------------- - -**Kolmogorov-Smirnov test** - -Kolmogorov-Smirnov test was derived in [2, 3, 4]. The idea is to consider the cumulative differences between sorted scores :math:`s_i` -and their corresponding labels :math:`y_i` and to compare its properties to that of a standard Brownian motion. Let us consider the -cumulative differences on sorted scores: - -.. math:: - C_k = \frac{1}{N}\sum_{i=1}^k (s_i - y_i) - -We also introduce a typical normalization scale :math:`\sigma`: - -.. math:: - \sigma = \frac{1}{N}\sqrt{\sum_{i=1}^N s_i(1 - s_i)} - -The Kolmogorov-Smirnov statistic is then defined as : - -.. math:: - G = \max|C_k|/\sigma - -It can be shown [2] that, under the null hypothesis of well-calibrated scores, this quantity asymptotically (i.e. when N goes to infinity) -converges to the maximum absolute value of a standard Brownian motion over the unit interval :math:`[0, 1]`. [3, 4] also provide closed-form -formulas for the cumulative distribution function (CDF) of the maximum absolute value of such a standard Brownian motion. -So we state the p-value associated to the statistical test of well calibration as: - -.. math:: - p = 1 - CDF(G) - -**Kuiper test** - -Kuiper test was derived in [2, 3, 4] and is very similar to Kolmogorov-Smirnov. This time, the statistic is defined as: - -.. math:: - H = (\max_k|C_k| - \min_k|C_k|)/\sigma - -It can be shown [2] that, under the null hypothesis of well-calibrated scores, this quantity asymptotically (i.e. when N goes to infinity) -converges to the range of a standard Brownian motion over the unit interval :math:`[0, 1]`. [3, 4] also provide closed-form -formulas for the cumulative distribution function (CDF) of the range of such a standard Brownian motion. -So we state the p-value associated to the statistical test of well calibration as: - -.. math:: - p = 1 - CDF(H) - -**Spiegelhalter test** - -Spiegelhalter test was derived in [6]. It is based on a decomposition of the Brier score: - -.. math:: - B = \frac{1}{N}\sum_{i=1}^N(y_i - s_i)^2 - -where scores are denoted :math:`s_i` and their corresponding labels :math:`y_i`. This can be decomposed in two terms: - -.. math:: - B = \frac{1}{N}\sum_{i=1}^N(y_i - s_i)(1 - 2s_i) + \frac{1}{N}\sum_{i=1}^N s_i(1 - s_i) - -It can be shown that the first term has an expected value of zero under the null hypothesis of well calibration. So we interpret -the second term as the Brier score expected value :math:`E(B)` under the null hypothesis. As for the variance of the Brier score, it can be -computed as: - -.. math:: - Var(B) = \frac{1}{N^2}\sum_{i=1}^N(1 - 2s_i)^2 s_i(1 - s_i) - -So we can build a Z-score as follows: - -.. math:: - Z = \frac{B - E(B)}{\sqrt{Var(B)}} = \frac{\sum_{i=1}^N(y_i - s_i)(1 - 2s_i)}{\sqrt{\sum_{i=1}^N(1 - 2s_i)^2 s_i(1 - s_i)}} - -This statistic follows a normal distribution of cumulative distribution CDF so that we state the associated p-value: - -.. math:: - p = 1 - CDF(Z) - -3. References -------------- +References +---------- [1] Gupta, Chirag, and Aaditya K. Ramdas. "Top-label calibration and multiclass-to-binary reductions." @@ -171,8 +69,7 @@ arXiv preprint arXiv:2202.00100. [4] D. A. Darling. A. J. F. Siegert. The First Passage Problem for a Continuous Markov Process. -Ann. Math. Statist. 24 (4) 624 - 639, December, -1953. +Ann. Math. Statist. 24 (4) 624 - 639, December, 1953. [5] William Feller. The Asymptotic Distribution of the Range of Sums of diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index 26b4fa1c4..cbe074141 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -2,13 +2,14 @@ .. _theoretical_description_metrics: +####################### Theoretical Description -======================= +####################### This document provides detailed descriptions of various metrics used to evaluate the performance of predictive models, particularly focusing on their ability to estimate uncertainties and calibrate predictions accurately. 1. General Metrics ------------------- +================== Regression Coverage Score ------------------------- @@ -115,45 +116,57 @@ The **Mean Winkler Interval (MWI) Score** evaluates prediction intervals by comb where :math:`\hat y^{\text{boundary}}_{i}` is the nearest interval boundary not containing :math:`y_{i}`, and :math:`\alpha` is the significance level. 2. Calibration Metrics ----------------------- +====================== + Expected Calibration Error --------------------------- +========================== + +The **Expected Calibration Error** (ECE) is a metric used to evaluate how well the predicted probabilities of a model align with the actual outcomes. It measures the difference between predicted confidence levels and actual accuracy. The process involves dividing the predictions into bins based on confidence scores and then comparing the accuracy within each bin to the average confidence level of the predictions in that bin. The number of bins is a hyperparameter :math:`M`, and we refer to a specific bin by :math:`B_m`. -The **Expected Calibration Error** (ECE) is a metric used to evaluate how well the predicted probabilities of a model align with the actual outcomes. The ECE provides a measure of the difference between predicted confidence levels and actual accuracy. The idea is to divide the predictions into bins based on confidence scores and then compare the accuracy within each bin to the average confidence level of the predictions in that bin. +For each bin :math:`B_m`, the accuracy and confidence are defined as follows: .. math:: - \text{ECE} = \sum_{i=1}^B \frac{|B_i|}{n} \left| \text{acc}(B_i) - \text{conf}(B_i) \right| -where: + \text{acc}(B_m) = \frac{1}{\left| B_m \right|} \sum_{i \in B_m} y_i + +.. math:: + + \text{conf}(B_m) = \frac{1}{\left| B_m \right|} \sum_{i \in B_m} \hat{f}(x_i) + +The ECE is then calculated using the following formula: + +.. math:: -- :math:`B_i` is the set of indices of samples that fall into the i-th bin. -- :math:`|B_i|` is the number of samples in the i-th bin. + \text{ECE} = \sum_{m=1}^M \frac{\left| B_m \right|}{n} \left| \text{acc}(B_m) - \text{conf}(B_m) \right| + +where: +- :math:`B_m` is the set of indices of samples that fall into the :math:`m`-th bin. +- :math:`\left| B_m \right|` is the number of samples in the :math:`m`-th bin. - :math:`n` is the total number of samples. -- :math:`\text{acc}(B_i)` is the accuracy within the i-th bin. -- :math:`\text{conf}(B_i)` is the average confidence score within the i-th bin. -- :math:`B` is the total number of bins. +- :math:`\text{acc}(B_m)` is the accuracy within the :math:`m`-th bin. +- :math:`\text{conf}(B_m)` is the average confidence score within the :math:`m`-th bin. -The difference between the average confidence and the actual accuracy within each bin is weighted by the proportion of samples in that bin, ensuring that bins with more samples have a larger influence on the final ECE value. +In simple terms, once the different bins from the confidence scores have been created, we check the mean accuracy of each bin. The absolute mean difference between the two is the ECE. Hence, the lower the ECE, the better the calibration was performed. The difference between the average confidence and the actual accuracy within each bin is weighted by the proportion of samples in that bin, ensuring that bins with more samples have a larger influence on the final ECE value. Top-Label Expected Calibration Error (Top-Label ECE) ----------------------------------------------------- +==================================================== -The **Top-Label Expected Calibration Error** (Top-Label ECE) extends the concept of ECE to the multi-class setting. Instead of evaluating calibration over all predicted probabilities, Top-Label ECE focuses on the calibration of the most confident prediction (top-label) for each sample. +The **Top-Label Expected Calibration Error** (Top-Label ECE) extends the concept of ECE to the multi-class setting. Instead of evaluating calibration over all predicted probabilities, Top-Label ECE focuses on the calibration of the most confident prediction (top-label) for each sample. For the top-label class, the calculation of the accuracy and confidence is conditioned on the top label, and the average ECE is taken for each top-label. The Top-Label ECE is calculated as follows: .. math:: + \text{Top-Label ECE} = \frac{1}{L} \sum_{j=1}^L \sum_{i=1}^B \frac{|B_{i,j}|}{n_j} \left| \text{acc}(B_{i,j}) - \text{conf}(B_{i,j}) \right| where: - - :math:`L` is the number of unique labels. -- :math:`B_{i,j}` is the set of indices of samples that fall into the i-th bin for label j. -- :math:`|B_{i,j}|` is the number of samples in the i-th bin for label j. -- :math:`n_j` is the total number of samples for label j. -- :math:`\text{acc}(B_{i,j})` is the accuracy within the i-th bin for label j. -- :math:`\text{conf}(B_{i,j})` is the average confidence score within the i-th bin for label j. +- :math:`B_{i,j}` is the set of indices of samples that fall into the :math:`i`-th bin for label :math:`j`. +- :math:`\left| B_{i,j} \right|` is the number of samples in the :math:`i`-th bin for label :math:`j`. +- :math:`n_j` is the total number of samples for label :math:`j`. +- :math:`\text{acc}(B_{i,j})` is the accuracy within the :math:`i`-th bin for label :math:`j`. +- :math:`\text{conf}(B_{i,j})` is the average confidence score within the :math:`i`-th bin for label :math:`j`. - :math:`B` is the total number of bins. For each label, the predictions are binned according to their confidence scores for that label. The calibration error is then calculated for each label separately and averaged across all labels to obtain the final Top-Label ECE value. This ensures that the calibration is measured specifically for the most confident prediction, which is often the most critical for decision-making in multi-class problems. @@ -175,34 +188,80 @@ where: Kolmogorov-Smirnov Statistic for Calibration -------------------------------------------- -This statistic measures the maximum absolute deviation between the empirical cumulative distribution function (ECDF) of observed outcomes and predicted probabilities [2, 3, 11]. +The **Kolmogorov-Smirnov test** was derived in [2, 3, 11]. The idea is to consider the cumulative differences between sorted scores :math:`s_i` +and their corresponding labels :math:`y_i` and to compare its properties to that of a standard Brownian motion. Let us consider the +cumulative differences on sorted scores: + +.. math:: + C_k = \frac{1}{N}\sum_{i=1}^k (s_i - y_i) + +We also introduce a typical normalization scale :math:`\sigma`: + +.. math:: + \sigma = \frac{1}{N}\sqrt{\sum_{i=1}^N s_i(1 - s_i)} + +The Kolmogorov-Smirnov statistic is then defined as : .. math:: + G = \max|C_k|/\sigma - \text{KS Statistic} = \sup_x |F_n(x) - S_n(x)| +It can be shown [2] that, under the null hypothesis of well-calibrated scores, this quantity asymptotically (i.e. when N goes to infinity) +converges to the maximum absolute value of a standard Brownian motion over the unit interval :math:`[0, 1]`. [3, 11] also provide closed-form +formulas for the cumulative distribution function (CDF) of the maximum absolute value of such a standard Brownian motion. +So we state the p-value associated to the statistical test of well calibration as: -where :math:`F_n(x)` is the ECDF of the predicted probabilities and :math:`S_n(x)` is the ECDF of the observed outcomes. +.. math:: + p = 1 - CDF(G) -Kuiper's Statistic ------------------- +Kuiper's Test +------------- -**Kuiper's Statistic** considers both the maximum deviation above and below the mean cumulative difference, making it more sensitive to deviations at the tails of the distribution [2, 3, 11]. +The **Kuiper test** was derived in [2, 3, 11] and is very similar to Kolmogorov-Smirnov. This time, the statistic is defined as: .. math:: + H = (\max_k|C_k| - \min_k|C_k|)/\sigma + +It can be shown [2] that, under the null hypothesis of well-calibrated scores, this quantity asymptotically (i.e. when N goes to infinity) +converges to the range of a standard Brownian motion over the unit interval :math:`[0, 1]`. [3, 11] also provide closed-form +formulas for the cumulative distribution function (CDF) of the range of such a standard Brownian motion. +So we state the p-value associated to the statistical test of well calibration as: - \text{Kuiper's Statistic} = \max(F_n(x) - S_n(x)) + \max(S_n(x) - F_n(x)) +.. math:: + p = 1 - CDF(H) Spiegelhalter’s Test -------------------- -**Spiegelhalter’s Test** assesses the calibration of binary predictions based on a transformation of the Brier score [9]. +The **Spiegelhalter test** was derived in [9]. It is based on a decomposition of the Brier score: .. math:: + B = \frac{1}{N}\sum_{i=1}^N(y_i - s_i)^2 - \text{Spiegelhalter's Statistic} = \frac{\sum_{i=1}^n (y_i - \hat y_i)(1 - 2\hat y_i)}{\sqrt{\sum_{i=1}^n (1 - 2 \hat y_i)^2 \hat y_i (1 - \hat y_i)}} +where scores are denoted :math:`s_i` and their corresponding labels :math:`y_i`. This can be decomposed in two terms: -3. References -------------- +.. math:: + B = \frac{1}{N}\sum_{i=1}^N(y_i - s_i)(1 - 2s_i) + \frac{1}{N}\sum_{i=1}^N s_i(1 - s_i) + +It can be shown that the first term has an expected value of zero under the null hypothesis of well calibration. So we interpret +the second term as the Brier score expected value :math:`E(B)` under the null hypothesis. As for the variance of the Brier score, it can be +computed as: + +.. math:: + Var(B) = \frac{1}{N^2}\sum_{i=1}^N(1 - 2s_i)^2 s_i(1 - s_i) + +So we can build a Z-score as follows: + +.. math:: + Z = \frac{B - E(B)}{\sqrt{Var(B)}} = \frac{\sum_{i=1}^N(y_i - s_i)(1 - 2s_i)}{\sqrt{\sum_{i=1}^N(1 - 2s_i)^2 s_i(1 - s_i)}} + +This statistic follows a normal distribution of cumulative distribution CDF so that we state the associated p-value: + +.. math:: + p = 1 - CDF(Z) + + +References +========== [1] Angelopoulos, A. N., & Bates, S. (2021). A gentle introduction to conformal prediction and From 9b458bad8a819691e329b171b6ce72359f81044c Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Wed, 22 May 2024 16:24:01 +0200 Subject: [PATCH 120/128] FIX: header and labels correction --- doc/quick_start.rst | 2 +- ...oretical_description_binary_classification.rst | 8 ++++---- doc/theoretical_description_classification.rst | 9 ++++----- doc/theoretical_description_conformity_scores.rst | 14 +++++++------- ...ical_description_multilabel_classification.rst | 15 +++++++-------- doc/theoretical_description_regression.rst | 6 +++--- 6 files changed, 26 insertions(+), 28 deletions(-) diff --git a/doc/quick_start.rst b/doc/quick_start.rst index 31e2efa97..dcdf6700e 100644 --- a/doc/quick_start.rst +++ b/doc/quick_start.rst @@ -114,7 +114,7 @@ It is given by the alpha parameter defined in ``MapieRegressor``, here equal to thus giving target coverages of ``0.95`` and ``0.68``. The effective coverage is the actual fraction of true labels lying in the prediction intervals. -2. Run MapieClassifier +3. Run MapieClassifier ---------------------- Similarly, it's possible to do the same for a basic classification problem. diff --git a/doc/theoretical_description_binary_classification.rst b/doc/theoretical_description_binary_classification.rst index 55e2f6144..9c8f6f336 100644 --- a/doc/theoretical_description_binary_classification.rst +++ b/doc/theoretical_description_binary_classification.rst @@ -2,9 +2,9 @@ .. _theoretical_description_binay_classification: -======================= +####################### Theoretical Description -======================= +####################### There are mainly three different ways to handle uncertainty quantification in binary classification: calibration (see :doc:`theoretical_description_calibration`), confidence interval (CI) for the probability @@ -83,8 +83,8 @@ for the labels of test objects which are guaranteed to be well-calibrated under that the observations are generated independently from the same distribution [2]. -4. References -------------- +References +---------- [1] Gupta, Chirag, Aleksandr Podkopaev, and Aaditya Ramdas. "Distribution-free binary classification: prediction sets, confidence intervals, and calibration." diff --git a/doc/theoretical_description_classification.rst b/doc/theoretical_description_classification.rst index a8ef17830..445fcfe42 100644 --- a/doc/theoretical_description_classification.rst +++ b/doc/theoretical_description_classification.rst @@ -2,10 +2,9 @@ .. _theoretical_description_classification: -======================= +####################### Theoretical Description -======================= - +####################### Three methods for multi-class uncertainty quantification have been implemented in MAPIE so far : LAC (that stands for Least Ambiguous set-valued Classifier) [1], Adaptive Prediction Sets [2, 3] and Top-K [3]. @@ -229,8 +228,8 @@ where : .. TO BE CONTINUED -5. References -------------- +References +---------- [1] Mauricio Sadinle, Jing Lei, & Larry Wasserman. "Least Ambiguous Set-Valued Classifiers With Bounded Error Levels." diff --git a/doc/theoretical_description_conformity_scores.rst b/doc/theoretical_description_conformity_scores.rst index 8ea72b6ff..5ec0aee4d 100644 --- a/doc/theoretical_description_conformity_scores.rst +++ b/doc/theoretical_description_conformity_scores.rst @@ -2,9 +2,9 @@ .. _theoretical_description_conformity_scores: -============================================= +############################################# Theoretical Description for Conformity Scores -============================================= +############################################# The :class:`mapie.conformity_scores.ConformityScore` class implements various methods to compute conformity scores for regression. @@ -25,7 +25,7 @@ quantiles will be computed : one on the right side of the distribution and the other on the left side. 1. The absolute residual score -============================== +------------------------------ The absolute residual score (:class:`mapie.conformity_scores.AbsoluteConformityScore`) is the simplest and most commonly used conformal score, it translates the error @@ -44,7 +44,7 @@ With this score, the intervals of predictions will be constant over the whole da This score is by default symmetric (*see above for definition*). 2. The gamma score -================== +------------------ The gamma score [2] (:class:`mapie.conformity_scores.GammaConformityScore`) adds a notion of adaptivity with the normalization of the residuals by the predictions. @@ -69,7 +69,7 @@ the order of magnitude of the predictions, implying that this score should be us in use cases where we want greater uncertainty when the prediction is high. 3. The residual normalized score -======================================= +-------------------------------- The residual normalized score [1] (:class:`mapie.conformity_scores.ResidualNormalisedScore`) is slightly more complex than the previous scores. @@ -97,7 +97,7 @@ it is not proportional to the uncertainty. Key takeaways -============= +------------- - The absolute residual score is the basic conformity score and gives constant intervals. It is the one used by default by :class:`mapie.regression.MapieRegressor`. - The gamma conformity score adds a notion of adaptivity by giving intervals of different sizes @@ -107,7 +107,7 @@ Key takeaways without specific assumptions on the data. References -========== +---------- [1] Lei, J., G'Sell, M., Rinaldo, A., Tibshirani, R. J., & Wasserman, L. (2018). Distribution-Free Predictive Inference for Regression. Journal of the American Statistical Association, 113(523), 1094–1111. diff --git a/doc/theoretical_description_multilabel_classification.rst b/doc/theoretical_description_multilabel_classification.rst index 011061e00..8dffb0b39 100644 --- a/doc/theoretical_description_multilabel_classification.rst +++ b/doc/theoretical_description_multilabel_classification.rst @@ -2,10 +2,9 @@ .. _theoretical_description_multilabel_classification: -======================= +####################### Theoretical Description -======================= - +####################### Three methods for multi-label uncertainty quantification have been implemented in MAPIE so far : Risk-Controlling Prediction Sets (RCPS) [1], Conformal Risk Control (CRC) [2] and Learn Then Test (LTT) [3]. @@ -38,7 +37,7 @@ Notice that at the opposite of the other two methods, LTT allows to control any we use CRC and RCPS for recall control and LTT for precision control. 1. Risk-Controlling Prediction Sets ------------------------------------ +=================================== 1.1. General settings --------------------- @@ -143,7 +142,7 @@ Then: 2. Conformal Risk Control -------------------------- +========================= The goal of this method is to control any monotone and bounded loss. The result of this method can be expressed as follows: @@ -166,7 +165,7 @@ With : 3. Learn Then Test ------------------- +================== 3.1. General settings --------------------- @@ -200,8 +199,8 @@ In order to find all the parameters :math:`\lambda` that satisfy the above condi that controls the family-wise error rate (FWER), for example, Bonferonni correction. -4. References -------------- +References +========== [1] Lihua Lei Jitendra Malik Stephen Bates, Anastasios Angelopoulos, and Michael I. Jordan. Distribution-free, risk-controlling prediction diff --git a/doc/theoretical_description_regression.rst b/doc/theoretical_description_regression.rst index c755975df..8f60c030c 100644 --- a/doc/theoretical_description_regression.rst +++ b/doc/theoretical_description_regression.rst @@ -2,9 +2,9 @@ .. _theoretical_description_regression: -======================= +####################### Theoretical Description -======================= +####################### The :class:`mapie.regression.MapieRegressor` class uses various resampling methods based on the jackknife strategy @@ -58,7 +58,7 @@ The figure below illustrates the naive method. :align: center 2. The split method -===================== +=================== The so-called split method computes the residuals of a calibration dataset to estimate the typical error obtained on a new test data point. From 75716ce3a39c047db81d4e78f83ed50c6a62d051 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 23 May 2024 11:31:54 +0200 Subject: [PATCH 121/128] FIX: indentation of headers --- doc/quick_start.rst | 8 +++----- doc/theoretical_description_metrics.rst | 4 ++-- doc/theoretical_description_multilabel_classification.rst | 2 -- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/doc/quick_start.rst b/doc/quick_start.rst index dcdf6700e..3754f5ff5 100644 --- a/doc/quick_start.rst +++ b/doc/quick_start.rst @@ -7,11 +7,9 @@ In regression settings, **MAPIE** provides prediction intervals on single-output In classification settings, **MAPIE** provides prediction sets on multi-class data. In any case, **MAPIE** is compatible with any scikit-learn-compatible estimator. -Estimate your prediction intervals -================================== 1. Download and install the module ----------------------------------- +================================== Install via ``pip``: @@ -33,7 +31,7 @@ To install directly from the github repository : 2. Run MapieRegressor ---------------------- +===================== Let us start with a basic regression problem. Here, we generate one-dimensional noisy data that we fit with a linear model. @@ -115,7 +113,7 @@ thus giving target coverages of ``0.95`` and ``0.68``. The effective coverage is the actual fraction of true labels lying in the prediction intervals. 3. Run MapieClassifier ----------------------- +======================= Similarly, it's possible to do the same for a basic classification problem. diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index cbe074141..6ae010bb3 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -120,7 +120,7 @@ where :math:`\hat y^{\text{boundary}}_{i}` is the nearest interval boundary not Expected Calibration Error -========================== +-------------------------- The **Expected Calibration Error** (ECE) is a metric used to evaluate how well the predicted probabilities of a model align with the actual outcomes. It measures the difference between predicted confidence levels and actual accuracy. The process involves dividing the predictions into bins based on confidence scores and then comparing the accuracy within each bin to the average confidence level of the predictions in that bin. The number of bins is a hyperparameter :math:`M`, and we refer to a specific bin by :math:`B_m`. @@ -150,7 +150,7 @@ where: In simple terms, once the different bins from the confidence scores have been created, we check the mean accuracy of each bin. The absolute mean difference between the two is the ECE. Hence, the lower the ECE, the better the calibration was performed. The difference between the average confidence and the actual accuracy within each bin is weighted by the proportion of samples in that bin, ensuring that bins with more samples have a larger influence on the final ECE value. Top-Label Expected Calibration Error (Top-Label ECE) -==================================================== +---------------------------------------------------- The **Top-Label Expected Calibration Error** (Top-Label ECE) extends the concept of ECE to the multi-class setting. Instead of evaluating calibration over all predicted probabilities, Top-Label ECE focuses on the calibration of the most confident prediction (top-label) for each sample. For the top-label class, the calculation of the accuracy and confidence is conditioned on the top label, and the average ECE is taken for each top-label. diff --git a/doc/theoretical_description_multilabel_classification.rst b/doc/theoretical_description_multilabel_classification.rst index 8dffb0b39..e3ff05da3 100644 --- a/doc/theoretical_description_multilabel_classification.rst +++ b/doc/theoretical_description_multilabel_classification.rst @@ -167,8 +167,6 @@ With : 3. Learn Then Test ================== -3.1. General settings ---------------------- We are going to present the Learn Then Test framework that allows the user to control non-monotonic risk such as precision score. This method has been introduced in article [3]. The settings here are the same as RCPS and CRC, we just need to introduce some new parameters: From 70bb4b1ff832b327305dd0792472c42a10ececea Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 23 May 2024 11:32:15 +0200 Subject: [PATCH 122/128] FIX: standardization --- .../plot_main-tutorial-regression.py | 41 ++++++++----------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/examples/regression/4-tutorials/plot_main-tutorial-regression.py b/examples/regression/4-tutorials/plot_main-tutorial-regression.py index 50c2fd48d..33a324f8b 100644 --- a/examples/regression/4-tutorials/plot_main-tutorial-regression.py +++ b/examples/regression/4-tutorials/plot_main-tutorial-regression.py @@ -2,31 +2,24 @@ =============================== Tutorial for tabular regression =============================== -""" -############################################################################## -# In this tutorial, we compare the prediction intervals estimated by MAPIE on a -# simple, one-dimensional, ground truth function -# :math:`f(x) = x \times \sin(x)`. -# -# Throughout this tutorial, we will answer the following questions: -# -# - How well do the MAPIE strategies capture the aleatoric uncertainty -# existing in the data? -# -# - How do the prediction intervals estimated by the resampling strategies -# evolve for new *out-of-distribution* data ? -# -# - How do the prediction intervals vary between regressor models ? -# -# Throughout this tutorial, we estimate the prediction intervals first using -# a polynomial function, and then using a boosting model, and a simple neural -# network. -# -# **For practical problems, we advise using the faster CV+ or -# Jackknife+-after-Bootstrap strategies. -# For conservative prediction interval estimates, you can alternatively -# use the CV-minmax strategies.** +In this tutorial, we compare the prediction intervals estimated by MAPIE on a +simple, one-dimensional, ground truth function +:math:`f(x) = x \times \sin(x)`. +Throughout this tutorial, we will answer the following questions: +- How well do the MAPIE strategies capture the aleatoric uncertainty + existing in the data? +- How do the prediction intervals estimated by the resampling strategies + evolve for new *out-of-distribution* data ? +- How do the prediction intervals vary between regressor models ? +Throughout this tutorial, we estimate the prediction intervals first using +a polynomial function, and then using a boosting model, and a simple neural +network. +**For practical problems, we advise using the faster CV+ or +Jackknife+-after-Bootstrap strategies. +For conservative prediction interval estimates, you can alternatively +use the CV-minmax strategies.** +""" import os import warnings From 209016372b9b04b4e59ee415f98b38369b85b503 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 23 May 2024 11:32:26 +0200 Subject: [PATCH 123/128] FIX: no references in tutorials --- .../4-tutorials/plot_ts-tutorial.py | 24 ++++--------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/examples/regression/4-tutorials/plot_ts-tutorial.py b/examples/regression/4-tutorials/plot_ts-tutorial.py index 24914c068..13dde284e 100644 --- a/examples/regression/4-tutorials/plot_ts-tutorial.py +++ b/examples/regression/4-tutorials/plot_ts-tutorial.py @@ -21,14 +21,14 @@ Once the base model is optimized, we can use :class:`~MapieTimeSeriesRegressor` to estimate the prediction intervals associated with one-step ahead forecasts through -the EnbPI method [1]. +the EnbPI method. As its parent class :class:`~MapieRegressor`, :class:`~MapieTimeSeriesRegressor` has two main arguments : "cv", and "method". In order to implement EnbPI, "method" must be set to "enbpi" (the default value) while "cv" must be set to the :class:`~mapie.subsample.BlockBootstrap` class that block bootstraps the training set. -This sampling method is used in [1] instead of the traditional bootstrap +This sampling method is used instead of the traditional bootstrap strategy as it is more suited for time series data. The EnbPI method allows you update the residuals during the prediction, @@ -38,26 +38,12 @@ class that block bootstraps the training set. the ``partial_fit`` class method called at every step. -The ACI [2] strategy allows you to adapt the conformal inference +The ACI strategy allows you to adapt the conformal inference (i.e the quantile). If the real values are not in the coverage, the size of the intervals will grow. Conversely, if the real values are in the coverage, the size of the intervals will decrease. You can use a gamma coefficient to adjust the strength of the correction. - -References ----------- -[1] Chen Xu and Yao Xie. -“Conformal Prediction Interval for Dynamic Time-Series.” -International Conference on Machine Learning (ICML, 2021). - -[2] Isaac Gibbs, Emmanuel Candes -"Adaptive conformal inference under distribution shift" -Advances in Neural Information Processing Systems, (NeurIPS, 2021). - -[3] Margaux Zaffran et al. -"Adaptive Conformal Predictions for Time Series" -https://arxiv.org/pdf/2202.07282.pdf """ import warnings @@ -180,7 +166,7 @@ class that block bootstraps the training set. # # We now use :class:`~MapieTimeSeriesRegressor` to build prediction intervals # associated with one-step ahead forecasts. As explained in the introduction, -# we use the EnbPI method [1] and the ACI method [2] . +# we use the EnbPI method and the ACI method. # # Estimating prediction intervals can be possible in three ways: # @@ -199,7 +185,7 @@ class that block bootstraps the training set. # sudden change points on test sets that have not been seen by the model # during training. # -# Following [1], we use the :class:`~BlockBootstrap` sampling +# We use the :class:`~BlockBootstrap` sampling # method instead of the traditional bootstrap strategy for training the model # since the former is more suited for time series data. # Here, we choose to perform 10 resamplings with 10 blocks. From 7d7dd08ff4e7891e4291bd2f280691151f1ff976 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 23 May 2024 11:36:34 +0200 Subject: [PATCH 124/128] FIX mathematical notation in example --- .../regression/1-quickstart/plot_cqr_symmetry_difference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py b/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py index 77271997c..aab634638 100644 --- a/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py +++ b/examples/regression/1-quickstart/plot_cqr_symmetry_difference.py @@ -111,4 +111,4 @@ # each bound, allowing for more flexible and accurate intervals that reflect # the heteroscedastic nature of the data. The resulting effective coverages # demonstrate the theoretical guarantee of the target coverage level -# $(1−\alpha)$. +# :math:`1 - \alpha`. From 9c66c07665ade8f0635f85a8e4289b6457349de8 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 23 May 2024 11:38:08 +0200 Subject: [PATCH 125/128] Update HISTORY.rst --- HISTORY.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/HISTORY.rst b/HISTORY.rst index bf1572ad4..ed90ac803 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -7,6 +7,7 @@ History * Fix conda versionning. * Reduce precision for test in `MapieCalibrator`. +* Add explanation and example for symmetry argument in CQR. 0.8.3 (2024-03-01) ------------------ From ded3f1e4d2fe779398b41502f83c87c5429fd911 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Thu, 23 May 2024 11:48:02 +0200 Subject: [PATCH 126/128] Fix formatting and indentation in regression tutorial --- .../regression/4-tutorials/plot_main-tutorial-regression.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/regression/4-tutorials/plot_main-tutorial-regression.py b/examples/regression/4-tutorials/plot_main-tutorial-regression.py index 33a324f8b..46dca8bc2 100644 --- a/examples/regression/4-tutorials/plot_main-tutorial-regression.py +++ b/examples/regression/4-tutorials/plot_main-tutorial-regression.py @@ -1,11 +1,10 @@ -""" +r""" =============================== Tutorial for tabular regression =============================== In this tutorial, we compare the prediction intervals estimated by MAPIE on a -simple, one-dimensional, ground truth function -:math:`f(x) = x \times \sin(x)`. +simple, one-dimensional, ground truth function :math:`f(x) = x \times \sin(x)`. Throughout this tutorial, we will answer the following questions: - How well do the MAPIE strategies capture the aleatoric uncertainty existing in the data? @@ -21,6 +20,7 @@ use the CV-minmax strategies.** """ + import os import warnings From 9dcca60b4656ee31a6e023b23c41b03b5414700f Mon Sep 17 00:00:00 2001 From: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> Date: Mon, 27 May 2024 09:55:05 +0200 Subject: [PATCH 127/128] FIX: add some line breaks in doc --- doc/theoretical_description_metrics.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/theoretical_description_metrics.rst b/doc/theoretical_description_metrics.rst index 6ae010bb3..398fdd7bb 100644 --- a/doc/theoretical_description_metrics.rst +++ b/doc/theoretical_description_metrics.rst @@ -141,6 +141,7 @@ The ECE is then calculated using the following formula: \text{ECE} = \sum_{m=1}^M \frac{\left| B_m \right|}{n} \left| \text{acc}(B_m) - \text{conf}(B_m) \right| where: + - :math:`B_m` is the set of indices of samples that fall into the :math:`m`-th bin. - :math:`\left| B_m \right|` is the number of samples in the :math:`m`-th bin. - :math:`n` is the total number of samples. @@ -161,6 +162,7 @@ The Top-Label ECE is calculated as follows: \text{Top-Label ECE} = \frac{1}{L} \sum_{j=1}^L \sum_{i=1}^B \frac{|B_{i,j}|}{n_j} \left| \text{acc}(B_{i,j}) - \text{conf}(B_{i,j}) \right| where: + - :math:`L` is the number of unique labels. - :math:`B_{i,j}` is the set of indices of samples that fall into the :math:`i`-th bin for label :math:`j`. - :math:`\left| B_{i,j} \right|` is the number of samples in the :math:`i`-th bin for label :math:`j`. From 9f21fda2640e4febf698430200b7a4e32a2f2331 Mon Sep 17 00:00:00 2001 From: Louis Lacombe Date: Mon, 27 May 2024 10:35:33 +0200 Subject: [PATCH 128/128] Update examples/regression/4-tutorials/plot_main-tutorial-regression.py Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- examples/regression/4-tutorials/plot_main-tutorial-regression.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/regression/4-tutorials/plot_main-tutorial-regression.py b/examples/regression/4-tutorials/plot_main-tutorial-regression.py index 46dca8bc2..51d97c8f4 100644 --- a/examples/regression/4-tutorials/plot_main-tutorial-regression.py +++ b/examples/regression/4-tutorials/plot_main-tutorial-regression.py @@ -6,6 +6,7 @@ In this tutorial, we compare the prediction intervals estimated by MAPIE on a simple, one-dimensional, ground truth function :math:`f(x) = x \times \sin(x)`. Throughout this tutorial, we will answer the following questions: + - How well do the MAPIE strategies capture the aleatoric uncertainty existing in the data? - How do the prediction intervals estimated by the resampling strategies