Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix #137 and #181 (faulty ROC AUC scores) #245

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 7 additions & 57 deletions matbench/data_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,18 +108,13 @@ def score_array(true_array, pred_array, task_type):
for metric in metrics:
mfunc = METRIC_MAP[metric]

if metric == "rocauc":
# Both arrays must be in probability form
# if pred. array is given in probabilities
if isinstance(pred_array[0], float):
true_array = homogenize_clf_array(true_array, to_probs=True)

# Other clf metrics always be converted to labels
elif metric in CLF_METRICS:
if isinstance(pred_array[0], float):
pred_array = homogenize_clf_array(pred_array, to_labels=True)

computed[metric] = mfunc(true_array, pred_array)
if metric in CLF_METRICS and metric != "rocauc":
# Discretize predictions, if metric is for classification and not rocauc.
pred_array_bool = (np.asarray(pred_array) > CLF_THRESH).tolist()
computed[metric] = mfunc(true_array, pred_array_bool)
else:
computed[metric] = mfunc(true_array, pred_array)

return computed


Expand Down Expand Up @@ -154,51 +149,6 @@ def mean_absolute_percentage_error(y_true, y_pred, threshold=1e-5):
return np.mean(np.fabs((y_true - y_pred) / y_true))


def homogenize_clf_array(array, to_probs=False, to_labels=False, thresh=CLF_THRESH):
"""
Homogenize an array of either:

1. labels (True, False) to probabilities (1.0, 0.0)
2. probabilities (between 0 and 1) to labels (True, False)
based on a threshold float

Args:
array ([bool], [float]): A list of bools or a list of floats 0-1.
to_probs (bool): Convert the input array to all probabilities
to_labels (bool): Convert the input array to all labels based on
the threshold value thresh.
thresh (float): A number 0-1, which will decide the threshold
of probabilities if to_labels is True

Returns:
list
"""
if sum([to_probs, to_labels]) != 1:
raise ValueError(
"Set ONE of to_probs or to_labels to True to define "
"the conversion, NOT both."
)

if to_probs:
if all([isinstance(i, bool) for i in array]):
# The source array is bools
homogenized = [1.0 if i is True else 0.0 for i in array]
return homogenized
else:
raise TypeError(
"Cannot convert non-bool type in clf array to " "probabilities."
)
elif to_labels:
if all([isinstance(i, float) for i in array]):
# The source array is probabilities
homogenized = np.asarray(array) > thresh
return homogenized.tolist()
else:
raise TypeError(
"Cannot convert non-float types in clf array to" "labels."
)


METRIC_MAP = {
"mae": mean_absolute_error,
"rmse": lambda true, pred: math.sqrt(mean_squared_error(true, pred)),
Expand Down
45 changes: 19 additions & 26 deletions matbench/tests/test_data_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

from matbench.constants import CLF_KEY, REG_KEY
from matbench.data_ops import (
homogenize_clf_array,
load,
mean_absolute_percentage_error,
score_array,
Expand Down Expand Up @@ -54,13 +53,23 @@ def test_downloads_mbv01(self):
)
self.assertIn(df[metadata["target"]].dtypes, test_types)

def assertDictAlmostEqual(self, first, second, **kwargs):
self.assertIsInstance(first, dict)
self.assertIsInstance(second, dict)
self.assertEqual(first.keys(), second.keys())
for key, value in first.items():
if isinstance(value, dict):
self.assertDictAlmostEqual(value, second[key], **kwargs)
else:
self.assertAlmostEqual(value, second[key], **kwargs)

def test_score_array(self):
# test for regression
true = [1, 2, 3, 4]
test = [1, 3, 3, 4]
ans = score_array(true, test, task_type=REG_KEY)
true_ans = {"mae": 0.25, "rmse": 0.5, "mape": 0.125, "max_error": 1}
self.assertDictEqual(ans, true_ans)
self.assertDictAlmostEqual(ans, true_ans)

# test for classification
true = [True, False]
Expand All @@ -72,13 +81,19 @@ def test_score_array(self):
"f1": 0.6666666666666666,
"rocauc": 0.5,
}
self.assertDictEqual(ans, true_ans)
self.assertDictAlmostEqual(ans, true_ans)

# test for probability clf
true = [True, False]
test = [0.7, 0.65]
ans = score_array(true, test, task_type=CLF_KEY)
self.assertDictEqual(ans, true_ans)
true_ans = {
"accuracy": 0.5,
"balanced_accuracy": 0.5,
"f1": 0.6666666666666666,
"rocauc": 1.0,
}
self.assertDictAlmostEqual(ans, true_ans)

def test_mean_absolute_percentage_error(self):

Expand All @@ -91,25 +106,3 @@ def test_mean_absolute_percentage_error(self):
self.assertAlmostEqual(mape, 0.09999999999999999)
self.assertAlmostEqual(mape, mape_masked)

def test_homogenize_clf_array(self):

bools = [True, False, True, True]
floats = [1.0, 0.3, 0.5001, 0.9]

probs = homogenize_clf_array(bools, to_probs=True)
self.assertAlmostEqual(probs[0], 1.0, places=5)
self.assertAlmostEqual(probs[1], 0.0, places=5)
self.assertAlmostEqual(probs[2], 1.0, places=5)
self.assertAlmostEqual(probs[3], 1.0, places=5)

labels = homogenize_clf_array(floats, to_labels=True, thresh=0.5)
self.assertTrue(labels[0])
self.assertFalse(labels[1])
self.assertTrue(labels[2])
self.assertTrue(labels[3])

labels2 = homogenize_clf_array(floats, to_labels=True, thresh=0.91)
self.assertTrue(labels2[0])
self.assertFalse(labels2[1])
self.assertFalse(labels2[2])
self.assertFalse(labels2[3])