materialsproject · robinruff · Mar 14, 2023 · Mar 26, 2023 · Apr 3, 2023
diff --git a/matbench/data_ops.py b/matbench/data_ops.py
@@ -108,18 +108,13 @@ def score_array(true_array, pred_array, task_type):
     for metric in metrics:
         mfunc = METRIC_MAP[metric]
 
-        if metric == "rocauc":
-            # Both arrays must be in probability form
-            # if pred. array is given in probabilities
-            if isinstance(pred_array[0], float):
-                true_array = homogenize_clf_array(true_array, to_probs=True)
-
-        # Other clf metrics always be converted to labels
-        elif metric in CLF_METRICS:
-            if isinstance(pred_array[0], float):
-                pred_array = homogenize_clf_array(pred_array, to_labels=True)
-
-        computed[metric] = mfunc(true_array, pred_array)
+        if metric in CLF_METRICS and metric != "rocauc":
+            # Discretize predictions, if metric is for classification and not rocauc.
+            pred_array_bool = (np.asarray(pred_array) > CLF_THRESH).tolist()
+            computed[metric] = mfunc(true_array, pred_array_bool)
+        else:
+            computed[metric] = mfunc(true_array, pred_array)
+
     return computed
 
 
@@ -154,51 +149,6 @@ def mean_absolute_percentage_error(y_true, y_pred, threshold=1e-5):
     return np.mean(np.fabs((y_true - y_pred) / y_true))
 
 
-def homogenize_clf_array(array, to_probs=False, to_labels=False, thresh=CLF_THRESH):
-    """
-    Homogenize an array of either:
-
-    1. labels (True, False) to probabilities (1.0, 0.0)
-    2. probabilities (between 0 and 1) to labels (True, False)
-        based on a threshold float
-
-    Args:
-        array ([bool], [float]): A list of bools or a list of floats 0-1.
-        to_probs (bool): Convert the input array to all probabilities
-        to_labels (bool): Convert the input array to all labels based on
-            the threshold value thresh.
-        thresh (float): A number 0-1, which will decide the threshold
-            of probabilities if to_labels is True
-
-    Returns:
-        list
-    """
-    if sum([to_probs, to_labels]) != 1:
-        raise ValueError(
-            "Set ONE of to_probs or to_labels to True to define "
-            "the conversion, NOT both."
-        )
-
-    if to_probs:
-        if all([isinstance(i, bool) for i in array]):
-            # The source array is bools
-            homogenized = [1.0 if i is True else 0.0 for i in array]
-            return homogenized
-        else:
-            raise TypeError(
-                "Cannot convert non-bool type in clf array to " "probabilities."
-            )
-    elif to_labels:
-        if all([isinstance(i, float) for i in array]):
-            # The source array is probabilities
-            homogenized = np.asarray(array) > thresh
-            return homogenized.tolist()
-        else:
-            raise TypeError(
-                "Cannot convert non-float types in clf array to" "labels."
-            )
-
-
 METRIC_MAP = {
     "mae": mean_absolute_error,
     "rmse": lambda true, pred: math.sqrt(mean_squared_error(true, pred)),

diff --git a/matbench/tests/test_data_ops.py b/matbench/tests/test_data_ops.py
@@ -5,7 +5,6 @@
 
 from matbench.constants import CLF_KEY, REG_KEY
 from matbench.data_ops import (
-    homogenize_clf_array,
     load,
     mean_absolute_percentage_error,
     score_array,
@@ -54,13 +53,23 @@ def test_downloads_mbv01(self):
             )
             self.assertIn(df[metadata["target"]].dtypes, test_types)
 
+    def assertDictAlmostEqual(self, first, second, **kwargs):
+        self.assertIsInstance(first, dict)
+        self.assertIsInstance(second, dict)
+        self.assertEqual(first.keys(), second.keys())
+        for key, value in first.items():
+            if isinstance(value, dict):
+                self.assertDictAlmostEqual(value, second[key], **kwargs)
+            else:
+                self.assertAlmostEqual(value, second[key], **kwargs)
+
     def test_score_array(self):
         # test for regression
         true = [1, 2, 3, 4]
         test = [1, 3, 3, 4]
         ans = score_array(true, test, task_type=REG_KEY)
         true_ans = {"mae": 0.25, "rmse": 0.5, "mape": 0.125, "max_error": 1}
-        self.assertDictEqual(ans, true_ans)
+        self.assertDictAlmostEqual(ans, true_ans)
 
         # test for classification
         true = [True, False]
@@ -72,13 +81,19 @@ def test_score_array(self):
             "f1": 0.6666666666666666,
             "rocauc": 0.5,
         }
-        self.assertDictEqual(ans, true_ans)
+        self.assertDictAlmostEqual(ans, true_ans)
 
         # test for probability clf
         true = [True, False]
         test = [0.7, 0.65]
         ans = score_array(true, test, task_type=CLF_KEY)
-        self.assertDictEqual(ans, true_ans)
+        true_ans = {
+            "accuracy": 0.5,
+            "balanced_accuracy": 0.5,
+            "f1": 0.6666666666666666,
+            "rocauc": 1.0,
+        }
+        self.assertDictAlmostEqual(ans, true_ans)
 
     def test_mean_absolute_percentage_error(self):
 
@@ -91,25 +106,3 @@ def test_mean_absolute_percentage_error(self):
         self.assertAlmostEqual(mape, 0.09999999999999999)
         self.assertAlmostEqual(mape, mape_masked)
 
-    def test_homogenize_clf_array(self):
-
-        bools = [True, False, True, True]
-        floats = [1.0, 0.3, 0.5001, 0.9]
-
-        probs = homogenize_clf_array(bools, to_probs=True)
-        self.assertAlmostEqual(probs[0], 1.0, places=5)
-        self.assertAlmostEqual(probs[1], 0.0, places=5)
-        self.assertAlmostEqual(probs[2], 1.0, places=5)
-        self.assertAlmostEqual(probs[3], 1.0, places=5)
-
-        labels = homogenize_clf_array(floats, to_labels=True, thresh=0.5)
-        self.assertTrue(labels[0])
-        self.assertFalse(labels[1])
-        self.assertTrue(labels[2])
-        self.assertTrue(labels[3])
-
-        labels2 = homogenize_clf_array(floats, to_labels=True, thresh=0.91)
-        self.assertTrue(labels2[0])
-        self.assertFalse(labels2[1])
-        self.assertFalse(labels2[2])
-        self.assertFalse(labels2[3])