Skip to content

Commit

Permalink
Some renaming
Browse files Browse the repository at this point in the history
No need for abbreviations here.
  • Loading branch information
nnansters committed Jul 18, 2024
1 parent 8582141 commit aaad143
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@
recall_sampling_error_components,
specificity_sampling_error,
specificity_sampling_error_components,
ap_sampling_error_components,
ap_sampling_error,
bv_sampling_error_components,
bv_sampling_error,
average_precision_sampling_error_components,
average_precision_sampling_error,
business_value_sampling_error_components,
business_value_sampling_error,
)
from nannyml.thresholds import Threshold, calculate_threshold_values

Expand Down Expand Up @@ -987,7 +987,7 @@ def _fit(self, reference_data: pd.DataFrame):
# sampling error
binarized_y_true = list(label_binarize(reference_data[self.y_true], classes=self.classes).T)
y_pred_proba = [reference_data[self.y_pred_proba[clazz]].T for clazz in self.classes]
self._sampling_error_components = ap_sampling_error_components(
self._sampling_error_components = average_precision_sampling_error_components(
y_true_reference=binarized_y_true, y_pred_proba_reference=y_pred_proba
)

Expand Down Expand Up @@ -1033,7 +1033,7 @@ def _sampling_error(self, data: pd.DataFrame) -> float:
)
return np.NaN
else:
return ap_sampling_error(self._sampling_error_components, data)
return average_precision_sampling_error(self._sampling_error_components, data)


@MetricFactory.register(metric='business_value', use_case=ProblemType.CLASSIFICATION_MULTICLASS)
Expand Down Expand Up @@ -1146,7 +1146,7 @@ def _fit(self, reference_data: pd.DataFrame):
f"business_value_matrix has shape {self.business_value_matrix.shape} "
"but we have {num_classes} classes!"
)
self._sampling_error_components = bv_sampling_error_components(
self._sampling_error_components = business_value_sampling_error_components(
y_true_reference=data[self.y_true],
y_pred_reference=data[self.y_pred],
business_value_matrix=self.business_value_matrix,
Expand Down Expand Up @@ -1180,4 +1180,4 @@ def _sampling_error(self, data: pd.DataFrame) -> float:
)
return np.NaN
else:
return bv_sampling_error(self._sampling_error_components, data)
return business_value_sampling_error(self._sampling_error_components, data)
11 changes: 6 additions & 5 deletions nannyml/performance_estimation/confidence_based/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2354,7 +2354,8 @@ def _fit(self, reference_data: pd.DataFrame):
"targets."
)
raise InvalidArgumentsException(
"y_pred_proba class and class probabilities dictionary does not match reference data.")
"y_pred_proba class and class probabilities dictionary does not match reference data."
)
# sampling error
binarized_y_true = list(label_binarize(reference_data[self.y_true], classes=self.classes).T)
y_pred_proba = [reference_data['uncalibrated_' + self.y_pred_proba[clazz]].T for clazz in self.classes]
Expand Down Expand Up @@ -3385,7 +3386,7 @@ def _fit(self, reference_data: pd.DataFrame):
# sampling error
binarized_y_true = list(label_binarize(reference_data[self.y_true], classes=self.classes).T)
y_pred_proba = [reference_data['uncalibrated_' + self.y_pred_proba[clazz]].T for clazz in self.classes]
self._sampling_error_components = mse.ap_sampling_error_components(
self._sampling_error_components = mse.average_precision_sampling_error_components(
y_true_reference=binarized_y_true, y_pred_proba_reference=y_pred_proba
)

Expand Down Expand Up @@ -3431,7 +3432,7 @@ def _sampling_error(self, data: pd.DataFrame) -> float:
)
return np.NaN
else:
return mse.ap_sampling_error(self._sampling_error_components, data)
return mse.average_precision_sampling_error(self._sampling_error_components, data)

def _realized_performance(self, data: pd.DataFrame) -> float:
try:
Expand Down Expand Up @@ -3525,7 +3526,7 @@ def _fit(self, reference_data: pd.DataFrame):
f"business_value_matrix has shape {self.business_value_matrix.shape} "
f"but we have {num_classes} classes!"
)
self._sampling_error_components = mse.bv_sampling_error_components(
self._sampling_error_components = mse.business_value_sampling_error_components(
y_true_reference=data[self.y_true],
y_pred_reference=data[self.y_pred],
business_value_matrix=self.business_value_matrix,
Expand Down Expand Up @@ -3584,7 +3585,7 @@ def _sampling_error(self, data: pd.DataFrame) -> float:
warnings.warn(_message)
return np.NaN
else:
return mse.bv_sampling_error(self._sampling_error_components, data)
return mse.business_value_sampling_error(self._sampling_error_components, data)

def _realized_performance(self, data: pd.DataFrame) -> float:
try:
Expand Down
26 changes: 13 additions & 13 deletions nannyml/sampling_error/multiclass_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,9 @@ def multiclass_confusion_matrix_sampling_error(sampling_error_components: Tuple,
return standard_errors


def ap_sampling_error_components(y_true_reference: List[np.ndarray], y_pred_proba_reference: List[pd.Series]):
def average_precision_sampling_error_components(
y_true_reference: List[np.ndarray], y_pred_proba_reference: List[pd.Series]
):
"""Calculate sampling error components for AP using reference data.
The ``y_true_reference`` and ``y_pred_proba_reference`` lists represent the binarized target values and model
Expand Down Expand Up @@ -451,7 +453,7 @@ def _get_class_components(y_true_reference: np.ndarray, y_pred_proba_reference:
return class_components


def ap_sampling_error(sampling_error_components, data) -> float:
def average_precision_sampling_error(sampling_error_components, data) -> float:
"""Calculate the AUROC sampling error for a chunk of data.
Parameters
Expand Down Expand Up @@ -479,16 +481,12 @@ def _calculate_business_value_per_row(
Intended to be used within a pandas apply function.
"""
cm = confusion_matrix(
y_true=np.array([row.y_true]),
y_pred=np.array([row.y_pred]),
labels=classes
)
cm = confusion_matrix(y_true=np.array([row.y_true]), y_pred=np.array([row.y_pred]), labels=classes)
bv = (cm * business_value_matrix).sum()
return bv


def bv_sampling_error_components(
def business_value_sampling_error_components(
y_true_reference: pd.Series,
y_pred_reference: pd.Series,
business_value_matrix: np.ndarray,
Expand All @@ -514,15 +512,17 @@ def bv_sampling_error_components(
-------
components: tuple
"""
data = pd.DataFrame({
'y_true': y_true_reference,
'y_pred': y_pred_reference,
})
data = pd.DataFrame(
{
'y_true': y_true_reference,
'y_pred': y_pred_reference,
}
)
bvs = data.apply(lambda x: _calculate_business_value_per_row(x, business_value_matrix, classes), axis=1)
return (bvs.std(), normalize_business_value)


def bv_sampling_error(sampling_error_components: Tuple, data) -> float:
def business_value_sampling_error(sampling_error_components: Tuple, data) -> float:
"""Calculate the false positive rate sampling error for a chunk of data.
Parameters
Expand Down

0 comments on commit aaad143

Please sign in to comment.