Skip to content

Commit

Permalink
fix specificity calculation for CBPE (#334)
Browse files Browse the repository at this point in the history
* fix specificity calculation for CBPE

* Feature/multiclass confusion matrix (#287)

* create tester

* Updated tester

* multiclass cm performance estimation

* Multiclass confusion matrix calc. and estimation + docs and tests for both

* Removed scratch testing files

* updating MCM docs

* Re-align docs with main version

* [skip ci] Update CHANGELOG.md

---------

Co-authored-by: Nikolaos Perrakis <[email protected]>
Co-authored-by: Niels Nuyttens <[email protected]>

* Small refactor to checks in realized performance calculations to make them consistent with the dedicated realized performance calculator.

* Fix  broken tests

* Fix linting errors due to merges

---------

Co-authored-by: Carter Blair <[email protected]>
Co-authored-by: Niels Nuyttens <[email protected]>
  • Loading branch information
3 people authored Nov 9, 2023
1 parent 80507da commit 58d44bc
Show file tree
Hide file tree
Showing 4 changed files with 586 additions and 65 deletions.
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# Author: Niels Nuyttens <[email protected]>
#
# License: Apache Software License 2.0
import warnings
from typing import Any, Dict, List, Optional, Tuple, Union

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score, roc_auc_score
import warnings

from nannyml._typing import ProblemType
from nannyml.base import _list_missing
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,25 @@
# License: Apache Software License 2.0

"""Module containing metric utilities and implementations."""
import warnings
from typing import Dict, List, Optional, Tuple, Union # noqa: TYP001

import numpy as np
import pandas as pd
import warnings
from sklearn.metrics import (
accuracy_score,
confusion_matrix,
f1_score,
multilabel_confusion_matrix,
precision_score,
recall_score,
roc_auc_score,
confusion_matrix,
)
from sklearn.preprocessing import LabelBinarizer, label_binarize

from nannyml._typing import ProblemType, class_labels, model_output_column_names
from nannyml.base import _list_missing
from nannyml.chunk import Chunker
from nannyml.exceptions import InvalidArgumentsException
from nannyml.performance_calculation.metrics.base import Metric, MetricFactory, _common_data_cleaning
from nannyml.sampling_error.multiclass_classification import (
Expand All @@ -44,7 +45,6 @@
multiclass_confusion_matrix_sampling_error_components,
)
from nannyml.thresholds import Threshold, calculate_threshold_values
from nannyml.chunk import Chunker


@MetricFactory.register(metric='roc_auc', use_case=ProblemType.CLASSIFICATION_MULTICLASS)
Expand Down Expand Up @@ -674,7 +674,10 @@ def _get_components(self, classes: List[str]) -> List[Tuple[str, str]]:
for true_class in classes:
for pred_class in classes:
components.append(
(f"true class: '{true_class}', predicted class: '{pred_class}'", f'true_{true_class}_pred_{pred_class}')
(
f"true class: '{true_class}', predicted class: '{pred_class}'",
f'true_{true_class}_pred_{pred_class}',
)
)

return components
Expand Down
164 changes: 160 additions & 4 deletions nannyml/performance_estimation/confidence_based/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import abc
import logging
import warnings
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union

import numpy as np
Expand Down Expand Up @@ -402,6 +403,11 @@ def _realized_performance(self, data: pd.DataFrame) -> float:
y_pred_proba, _, y_true = self._common_cleaning(data, y_pred_proba_column_name=self.uncalibrated_y_pred_proba)

if y_true is None:
warnings.warn("No 'y_true' values given for chunk, returning NaN as realized ROC-AUC.")
return np.NaN

if y_true.nunique() <= 1:
warnings.warn("Too few unique values present in 'y_true', returning NaN as realized ROC-AUC.")
return np.NaN

return roc_auc_score(y_true, y_pred_proba)
Expand Down Expand Up @@ -494,6 +500,15 @@ def _realized_performance(self, data: pd.DataFrame) -> float:
_, y_pred, y_true = self._common_cleaning(data, y_pred_proba_column_name=self.uncalibrated_y_pred_proba)

if y_true is None:
warnings.warn("No 'y_true' values given for chunk, returning NaN as realized F1 score.")
return np.NaN

if y_true.nunique() <= 1:
warnings.warn("Too few unique values present in 'y_true', returning NaN as realized F1 score.")
return np.NaN

if y_pred.nunique() <= 1:
warnings.warn("Too few unique values present in 'y_pred', returning NaN as realized F1 score.")
return np.NaN

return f1_score(y_true=y_true, y_pred=y_pred)
Expand Down Expand Up @@ -570,6 +585,15 @@ def _realized_performance(self, data: pd.DataFrame) -> float:
_, y_pred, y_true = self._common_cleaning(data, y_pred_proba_column_name=self.uncalibrated_y_pred_proba)

if y_true is None:
warnings.warn("No 'y_true' values given for chunk, returning NaN as realized precision.")
return np.NaN

if y_true.nunique() <= 1:
warnings.warn("Too few unique values present in 'y_true', returning NaN as realized precision.")
return np.NaN

if y_pred.nunique() <= 1:
warnings.warn("Too few unique values present in 'y_pred', returning NaN as realized precision.")
return np.NaN

return precision_score(y_true=y_true, y_pred=y_pred)
Expand Down Expand Up @@ -644,6 +668,15 @@ def _realized_performance(self, data: pd.DataFrame) -> float:
_, y_pred, y_true = self._common_cleaning(data, y_pred_proba_column_name=self.uncalibrated_y_pred_proba)

if y_true is None:
warnings.warn("No 'y_true' values given for chunk, returning NaN as realized recall.")
return np.NaN

if y_true.nunique() <= 1:
warnings.warn("Too few unique values present in 'y_true', returning NaN as recall precision.")
return np.NaN

if y_pred.nunique() <= 1:
warnings.warn("Too few unique values present in 'y_pred', returning NaN as recall precision.")
return np.NaN

return recall_score(y_true=y_true, y_pred=y_pred)
Expand Down Expand Up @@ -718,10 +751,19 @@ def _realized_performance(self, data: pd.DataFrame) -> float:
_, y_pred, y_true = self._common_cleaning(data, y_pred_proba_column_name=self.uncalibrated_y_pred_proba)

if y_true is None:
warnings.warn("No 'y_true' values given for chunk, returning NaN as realized specificity.")
return np.NaN

if y_true.nunique() <= 1:
warnings.warn("Too few unique values present in 'y_true', returning NaN as realized specificity.")
return np.NaN

if y_pred.nunique() <= 1:
warnings.warn("Too few unique values present in 'y_pred', returning NaN as realized specificity.")
return np.NaN

conf_matrix = confusion_matrix(y_true=y_true, y_pred=y_pred)
return conf_matrix[1, 1] / (conf_matrix[1, 0] + conf_matrix[1, 1])
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
return tn / (tn + fp)


def estimate_specificity(y_pred: pd.DataFrame, y_pred_proba: pd.DataFrame) -> float:
Expand Down Expand Up @@ -797,6 +839,15 @@ def _realized_performance(self, data: pd.DataFrame) -> float:
_, y_pred, y_true = self._common_cleaning(data, y_pred_proba_column_name=self.uncalibrated_y_pred_proba)

if y_true is None:
warnings.warn("No 'y_true' values given for chunk, returning NaN as realized accuracy.")
return np.NaN

if y_true.nunique() <= 1:
warnings.warn("Too few unique values present in 'y_true', returning NaN as realized accuracy.")
return np.NaN

if y_pred.nunique() <= 1:
warnings.warn("Too few unique values present in 'y_pred', returning NaN as realized accuracy.")
return np.NaN

return accuracy_score(y_true=y_true, y_pred=y_pred)
Expand Down Expand Up @@ -961,6 +1012,15 @@ def _true_positive_realized_performance(self, data: pd.DataFrame) -> float:
_, y_pred, y_true = self._common_cleaning(data, y_pred_proba_column_name=self.uncalibrated_y_pred_proba)

if y_true is None:
warnings.warn("No 'y_true' values given for chunk, returning NaN as realized confusion matrix.")
return np.NaN

if y_true.nunique() <= 1:
warnings.warn("Too few unique values present in 'y_true', returning NaN as realized confusion matrix.")
return np.NaN

if y_pred.nunique() <= 1:
warnings.warn("Too few unique values present in 'y_pred', returning NaN as realized confusion matrix.")
return np.NaN

num_tp = np.sum(np.logical_and(y_pred, y_true))
Expand All @@ -980,6 +1040,7 @@ def _true_negative_realized_performance(self, data: pd.DataFrame) -> float:
_, y_pred, y_true = self._common_cleaning(data, y_pred_proba_column_name=self.uncalibrated_y_pred_proba)

if y_true is None:
warnings.warn("No 'y_true' values given for chunk, returning NaN as realized confusion matrix.")
return np.NaN

num_tn = np.sum(np.logical_and(np.logical_not(y_pred), np.logical_not(y_true)))
Expand All @@ -999,6 +1060,15 @@ def _false_positive_realized_performance(self, data: pd.DataFrame) -> float:
_, y_pred, y_true = self._common_cleaning(data, y_pred_proba_column_name=self.uncalibrated_y_pred_proba)

if y_true is None:
warnings.warn("No 'y_true' values given for chunk, returning NaN as realized confusion matrix.")
return np.NaN

if y_true.nunique() <= 1:
warnings.warn("Too few unique values present in 'y_true', returning NaN as realized confusion matrix.")
return np.NaN

if y_pred.nunique() <= 1:
warnings.warn("Too few unique values present in 'y_pred', returning NaN as realized confusion matrix.")
return np.NaN

num_tp = np.sum(np.logical_and(y_pred, y_true))
Expand All @@ -1018,6 +1088,15 @@ def _false_negative_realized_performance(self, data: pd.DataFrame) -> float:
_, y_pred, y_true = self._common_cleaning(data, y_pred_proba_column_name=self.uncalibrated_y_pred_proba)

if y_true is None:
warnings.warn("No 'y_true' values given for chunk, returning NaN as realized confusion matrix.")
return np.NaN

if y_true.nunique() <= 1:
warnings.warn("Too few unique values present in 'y_true', returning NaN as realized confusion matrix.")
return np.NaN

if y_pred.nunique() <= 1:
warnings.warn("Too few unique values present in 'y_pred', returning NaN as realized confusion matrix.")
return np.NaN

num_tp = np.sum(np.logical_and(y_pred, y_true))
Expand Down Expand Up @@ -1500,6 +1579,15 @@ def _realized_performance(self, data: pd.DataFrame) -> float:
_, y_pred, y_true = self._common_cleaning(data, y_pred_proba_column_name=self.uncalibrated_y_pred_proba)

if y_true is None:
warnings.warn("No 'y_true' values given for chunk, returning NaN as realized business value.")
return np.NaN

if y_true.nunique() <= 1:
warnings.warn("Too few unique values present in 'y_true', returning NaN as realized business value.")
return np.NaN

if y_pred.nunique() <= 1:
warnings.warn("Too few unique values present in 'y_pred', returning NaN as realized business value.")
return np.NaN

tp_value = self.business_value_matrix[1, 1]
Expand Down Expand Up @@ -1677,7 +1765,13 @@ def _sampling_error(self, data: pd.DataFrame) -> float:

def _realized_performance(self, data: pd.DataFrame) -> float:
data = self._ensure_targets(data)

if data is None:
warnings.warn("No 'y_true' values given for chunk, returning NaN as realized ROC-AUC.")
return np.NaN

if data[self.y_true].nunique() <= 1:
warnings.warn("Too few unique values present in 'y_true', returning NaN as realized ROC-AUC.")
return np.NaN

_, y_pred_probas, labels = _get_multiclass_uncalibrated_predictions(data, self.y_pred, self.y_pred_proba)
Expand Down Expand Up @@ -1734,7 +1828,17 @@ def _sampling_error(self, data: pd.DataFrame) -> float:

def _realized_performance(self, data: pd.DataFrame) -> float:
data = self._ensure_targets(data)

if data is None:
warnings.warn("No 'y_true' values given for chunk, returning NaN as realized F1 score.")
return np.NaN

if data[self.y_true].nunique() <= 1:
warnings.warn("Too few unique values present in 'y_true', returning NaN as realized F1 score.")
return np.NaN

if data[self.y_pred].nunique() <= 1:
warnings.warn("Too few unique values present in 'y_pred', returning NaN as realized F1 score.")
return np.NaN

y_pred, _, labels = _get_multiclass_uncalibrated_predictions(data, self.y_pred, self.y_pred_proba)
Expand Down Expand Up @@ -1791,7 +1895,17 @@ def _sampling_error(self, data: pd.DataFrame) -> float:

def _realized_performance(self, data: pd.DataFrame) -> float:
data = self._ensure_targets(data)

if data is None:
warnings.warn("No 'y_true' values given for chunk, returning NaN as realized precision.")
return np.NaN

if data[self.y_true].nunique() <= 1:
warnings.warn("Too few unique values present in 'y_true', returning NaN as realized precision.")
return np.NaN

if data[self.y_pred].nunique() <= 1:
warnings.warn("Too few unique values present in 'y_pred', returning NaN as realized precision.")
return np.NaN

y_pred, _, labels = _get_multiclass_uncalibrated_predictions(data, self.y_pred, self.y_pred_proba)
Expand Down Expand Up @@ -1848,7 +1962,17 @@ def _sampling_error(self, data: pd.DataFrame) -> float:

def _realized_performance(self, data: pd.DataFrame) -> float:
data = self._ensure_targets(data)

if data is None:
warnings.warn("No 'y_true' values given for chunk, returning NaN as realized recall.")
return np.NaN

if data[self.y_true].nunique() <= 1:
warnings.warn("Too few unique values present in 'y_true', returning NaN as realized recall.")
return np.NaN

if data[self.y_pred].nunique() <= 1:
warnings.warn("Too few unique values present in 'y_pred', returning NaN as realized recall.")
return np.NaN

y_pred, _, labels = _get_multiclass_uncalibrated_predictions(data, self.y_pred, self.y_pred_proba)
Expand Down Expand Up @@ -1905,7 +2029,17 @@ def _sampling_error(self, data: pd.DataFrame) -> float:

def _realized_performance(self, data: pd.DataFrame) -> float:
data = self._ensure_targets(data)

if data is None:
warnings.warn("No 'y_true' values given for chunk, returning NaN as realized specificity.")
return np.NaN

if data[self.y_true].nunique() <= 1:
warnings.warn("Too few unique values present in 'y_true', returning NaN as realized specificity.")
return np.NaN

if data[self.y_pred].nunique() <= 1:
warnings.warn("Too few unique values present in 'y_pred', returning NaN as realized specificity.")
return np.NaN

y_pred, _, labels = _get_multiclass_uncalibrated_predictions(data, self.y_pred, self.y_pred_proba)
Expand Down Expand Up @@ -1964,8 +2098,19 @@ def _sampling_error(self, data: pd.DataFrame) -> float:

def _realized_performance(self, data: pd.DataFrame) -> float:
data = self._ensure_targets(data)

if data is None:
warnings.warn("No 'y_true' values given for chunk, returning NaN as realized accuracy.")
return np.NaN

if data[self.y_true].nunique() <= 1:
warnings.warn("Too few unique values present in 'y_true', returning NaN as realized accuracy.")
return np.NaN

if data[self.y_pred].nunique() <= 1:
warnings.warn("Too few unique values present in 'y_pred', returning NaN as realized accuracy.")
return np.NaN

y_pred, _, _ = _get_multiclass_uncalibrated_predictions(data, self.y_pred, self.y_pred_proba)
return accuracy_score(data[self.y_true], y_pred)

Expand Down Expand Up @@ -2011,7 +2156,10 @@ def _get_components(self, classes: List[str]) -> List[Tuple[str, str]]:
for true_class in classes:
for pred_class in classes:
components.append(
(f"true class: '{true_class}', predicted class: '{pred_class}'", f'true_{true_class}_pred_{pred_class}')
(
f"true class: '{true_class}', predicted class: '{pred_class}'",
f'true_{true_class}_pred_{pred_class}',
)
)

return components
Expand Down Expand Up @@ -2074,8 +2222,16 @@ def _multiclass_confusion_matrix_alert_thresholds(
return alert_thresholds

def _multi_class_confusion_matrix_realized_performance(self, data: pd.DataFrame) -> Union[np.ndarray, float]:
if data is None or self.y_true not in data.columns:
warnings.warn("No 'y_true' values given for chunk, returning NaN as realized precision.")
return np.NaN

if data[self.y_true].nunique() <= 1:
warnings.warn("Too few unique values present in 'y_true', returning NaN as realized precision.")
return np.NaN

if self.y_true not in data.columns or data[self.y_true].isna().all():
if data[self.y_pred].nunique() <= 1:
warnings.warn("Too few unique values present in 'y_pred', returning NaN as realized precision.")
return np.NaN

cm = confusion_matrix(
Expand Down
Loading

0 comments on commit 58d44bc

Please sign in to comment.