sbi-dev · janfb · Jan 28, 2022 · Jun 3, 2021 · Jun 3, 2021 · Jun 3, 2021
diff --git a/sbi/utils/metrics.py b/sbi/utils/metrics.py
@@ -1,41 +1,178 @@
 # This file is part of sbi, a toolkit for simulation-based inference. sbi is licensed
 # under the Affero General Public License v3, see <https://www.gnu.org/licenses/>.
 
-from typing import Optional
+from typing import Any, Dict, Optional
 
 import numpy as np
 import torch
+from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import KFold, cross_val_score
 from sklearn.neural_network import MLPClassifier
 from torch import Tensor
 
-from sbi.utils import tensor2numpy
-
 
 def c2st(
     X: Tensor,
     Y: Tensor,
     seed: int = 1,
     n_folds: int = 5,
-    scoring: str = "accuracy",
+    metric: str = "accuracy",
+    classifier: str = "rf",
+) -> Tensor:
+    """
+    Return accuracy of classifier trained to distinguish samples from supposedly
+    two distributions <X> and <Y>. For details on the method, see [1,2].
+    If the returned accuracy is 0.5, <X> and <Y> are considered to be from the
+    same generating PDF, i.e. they can not be differentiated.
+    If the returned accuracy is around 1., <X> and <Y> are considered to be from
+    two different generating PDFs.
+
+    Training of the classifier with N-fold cross-validation [3] using sklearn.
+    By default, a `RandomForestClassifier` by from `sklearn.ensemble` is used
+    (<classifier> = 'rf'). Alternatively, a multi-layer perceptron is available
+    (<classifier> = 'mlp'). For a small study on the pros and cons for this
+    choice see [4]. Before both samples are ingested, they are normalized (z scored)
+    under the assumption that each dimension in X follows a normal distribution, i.e.
+    the mean(X) is subtracted from X and this difference is divided by std(X)
+    for every dimension.
+
+    If you need a more flexible interface which is able to take a sklearn
+    compatible classifier and more, see the `c2st_` method in this module.
+
+    Args:
+        X: Samples from one distribution.
+        Y: Samples from another distribution.
+        seed: Seed for the sklearn classifier and the KFold cross-validation
+        n_folds: Number of folds to use
+        metric: sklearn compliant metric to use for the scoring parameter of cross_val_score
+        classifier: classification architecture to use, possible values: 'rf' or 'mlp'
+
+    Return:
+        torch.tensor containing the mean accuracy score over the test sets
+        from cross-validation
+
+    Example:
+    ``` py
+    > c2st(X,Y)
+    [0.51904464] #X and Y likely come from the same PDF or ensemble
+    > c2st(P,Q)
+    [0.998456] #P and Q likely come from two different PDFs or ensembles
+    ```
+
+    References:
+        [1]: http://arxiv.org/abs/1610.06545
+        [2]: https://www.osti.gov/biblio/826696/
+        [3]: https://scikit-learn.org/stable/modules/cross_validation.html
+        [4]: https://github.com/psteinb/c2st/
+    """
+
+    # the default configuration
+    clf_class = RandomForestClassifier
+    clf_kwargs = {}
+
+    if "mlp" in classifier.lower():
+        ndim = X.shape[-1]
+        clf_class = MLPClassifier
+        clf_kwargs = {
+            "activation": "relu",
+            "hidden_layer_sizes": (10 * ndim, 10 * ndim),
+            "max_iter": 1000,
+            "solver": "adam",
+            "early_stopping": True,
+            "n_iter_no_change": 50,
+        }
+
+    noise_scale = None
+    z_score = True
+    verbosity = 0
+
+    scores_ = c2st_scores(
+        X,
+        Y,
+        seed=seed,
+        n_folds=n_folds,
+        metric=metric,
+        z_score=z_score,
+        noise_scale=noise_scale,
+        verbosity=verbosity,
+        clf_class=clf_class,
+        clf_kwargs=clf_kwargs,
+    )
+
+    # TODO: unclear why np.asarray needs to be used here
+    scores = np.asarray(np.mean(scores_)).astype(np.float32)
+    value = torch.from_numpy(np.atleast_1d(scores))
+    return value
+
+
+def c2st_scores(
+    X: Tensor,
+    Y: Tensor,
+    seed: int = 1,
+    n_folds: int = 5,
+    metric: str = "accuracy",
     z_score: bool = True,
     noise_scale: Optional[float] = None,
+    verbosity: int = 0,
+    clf_class: Any = RandomForestClassifier,
+    clf_kwargs: Dict[str, Any] = {},
 ) -> Tensor:
-    """Return accuracy of classifier trained to distinguish samples from two distributions.
+    """
+    Return accuracy of classifier trained to distinguish samples from supposedly
+    two distributions <X> and <Y>. For details on the method, see [1,2].
+    If the returned accuracy is 0.5, <X> and <Y> are considered to be from the
+    same generating PDF, i.e. they can not be differentiated.
+    If the returned accuracy is around 1., <X> and <Y> are considered to be from
+    two different generating PDFs.
+
+    This function performs training of the classifier with N-fold cross-validation [3] using sklearn.
+    By default, a `RandomForestClassifier` by from `sklearn.ensemble` is used which
+    is recommended based on the study performed in [4].
+    This can be changed using <clf_class>. This class is used in the following
+    fashion:
+
+    ``` py
+    clf = clf_class(random_state=seed, **clf_kwargs)
+    #...
+    scores = cross_val_score(
+        clf, data, target, cv=shuffle, scoring=scoring, verbose=verbosity
+    )
+    ```
+    Further configuration of the classifier can be performed using <clf_kwargs>.
+    If you like to provide a custom class for training, it has to satisfy the
+    internal requirements of `sklearn.model_selection.cross_val_score`.
 
-    Trains classifiers with N-fold cross-validation [1]. Scikit learn MLPClassifier are
-    used, with 2 hidden layers of 10x dim each, where dim is the dimensionality of the
-    samples X and Y.
     Args:
         X: Samples from one distribution.
         Y: Samples from another distribution.
-        seed: Seed for sklearn
-        n_folds: Number of folds
-        z_score: Z-scoring using X
-        noise_scale: If passed, will add Gaussian noise with std noise_scale to samples
+        seed: Seed for the sklearn classifier and the KFold cross validation
+        n_folds: Number of folds to use for cross validation
+        metric: sklearn compliant metric to use for the scoring parameter of cross_val_score
+        z_score: Z-scoring using X, i.e. mean and std deviation of X is used to normalize Y, i.e. Y=(Y - mean)/std
+        noise_scale: If passed, will add Gaussian noise with standard deviation <noise_scale> to samples of X and of Y
+        verbosity: control the verbosity of sklearn.model_selection.cross_val_score
+        clf_class: a scikit-learn classifier class
+        clf_kwargs: key-value arguments dictionary to the class specified by clf_class, e.g. sklearn.ensemble.RandomForestClassifier
+
+    Return:
+        np.ndarray containing the calculated <metric> scores over the test set
+        folds from cross-validation
+
+    Example:
+    ``` py
+    > c2st_scores(X,Y)
+    [0.51904464,0.5309201,0.4959452,0.5487709,0.50682926]
+    #X and Y likely come from the same PDF or ensemble
+    > c2st_scores(P,Q)
+    [0.998456,0.9982912,0.9980476,0.9980488,0.99805826]
+    #P and Q likely come from two different PDFs or ensembles
+    ```
 
     References:
-        [1]: https://scikit-learn.org/stable/modules/cross_validation.html
+        [1]: http://arxiv.org/abs/1610.06545
+        [2]: https://www.osti.gov/biblio/826696/
+        [3]: https://scikit-learn.org/stable/modules/cross_validation.html
+        [4]: https://github.com/psteinb/c2st/
     """
     if z_score:
         X_mean = torch.mean(X, axis=0)
@@ -47,27 +184,22 @@ def c2st(
         X += noise_scale * torch.randn(X.shape)
         Y += noise_scale * torch.randn(Y.shape)
 
-    X = tensor2numpy(X)
-    Y = tensor2numpy(Y)
+    X = X.cpu().numpy()
+    Y = Y.cpu().numpy()
 
-    ndim = X.shape[1]
-
-    clf = MLPClassifier(
-        activation="relu",
-        hidden_layer_sizes=(10 * ndim, 10 * ndim),
-        max_iter=1000,
-        solver="adam",
-        random_state=seed,
-    )
+    clf = clf_class(random_state=seed, **clf_kwargs)
 
+    # prepare data
     data = np.concatenate((X, Y))
+    # labels
     target = np.concatenate((np.zeros((X.shape[0],)), np.ones((Y.shape[0],))))
 
     shuffle = KFold(n_splits=n_folds, shuffle=True, random_state=seed)
-    scores = cross_val_score(clf, data, target, cv=shuffle, scoring=scoring)
+    scores = cross_val_score(
+        clf, data, target, cv=shuffle, scoring=metric, verbose=verbosity
+    )
 
-    scores = np.asarray(np.mean(scores)).astype(np.float32)
-    return torch.from_numpy(np.atleast_1d(scores))
+    return scores
 
 
 def unbiased_mmd_squared(x, y):

diff --git a/tests/metrics_test.py b/tests/metrics_test.py
@@ -0,0 +1,132 @@
+# This file is part of sbi, a toolkit for simulation-based inference. sbi is licensed
+# under the Affero General Public License v3, see <https://www.gnu.org/licenses/>.
+
+from __future__ import annotations
+
+import numpy as np
+import pytest
+import torch
+from sklearn.neural_network import MLPClassifier
+from torch.distributions import MultivariateNormal as tmvn
+
+from sbi.utils.metrics import c2st, c2st_scores
+
+## c2st related:
+## for a study about c2st see https://github.com/psteinb/c2st/
+
+TESTCASECONFIG = [
+    (
+        # both samples are identical, the mean accuracy should be around 0.5
+        0.0,  # dist_sigma
+        0.45,  # c2st_lowerbound
+        0.55,  # c2st_upperbound
+    ),
+    (
+        # both samples are rather close, the mean accuracy should be larger than 0.5 and be lower than 1.
+        1.0,
+        0.85,
+        1.0,
+    ),
+    (
+        # both samples are very far apart, the mean accuracy should close to 1.
+        20.0,
+        0.98,
+        1.0,
+    ),
+]
+
+
+@pytest.mark.parametrize(
+    "dist_sigma, c2st_lowerbound, c2st_upperbound,",
+    TESTCASECONFIG,
+)
+def test_c2st_with_different_distributions(
+    dist_sigma, c2st_lowerbound, c2st_upperbound, set_seed
+):
+
+    ndim = 10
+    nsamples = 1024
+
+    refdist = tmvn(loc=torch.zeros(ndim), covariance_matrix=torch.eye(ndim))
+    otherdist = tmvn(
+        loc=dist_sigma + torch.zeros(ndim), covariance_matrix=torch.eye(ndim)
+    )
+
+    X = refdist.sample((nsamples,))
+    Y = otherdist.sample((nsamples,))
+
+    obs_c2st = c2st(X, Y)
+
+    assert len(obs_c2st) > 0
+    assert c2st_lowerbound < obs_c2st[0]
+    assert obs_c2st[0] <= c2st_upperbound
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize(
+    "dist_sigma, c2st_lowerbound, c2st_upperbound,",
+    TESTCASECONFIG,
+)
+def test_c2st_with_different_distributions_mlp(
+    dist_sigma, c2st_lowerbound, c2st_upperbound, set_seed
+):
+
+    ndim = 10
+    nsamples = 1024
+
+    refdist = tmvn(loc=torch.zeros(ndim), covariance_matrix=torch.eye(ndim))
+    otherdist = tmvn(
+        loc=dist_sigma + torch.zeros(ndim), covariance_matrix=torch.eye(ndim)
+    )
+
+    X = refdist.sample((nsamples,))
+    Y = otherdist.sample((nsamples,))
+
+    obs_c2st = c2st(X, Y, classifier="mlp")
+
+    assert len(obs_c2st) > 0
+    assert c2st_lowerbound < obs_c2st[0]
+    assert obs_c2st[0] <= c2st_upperbound
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize(
+    "dist_sigma, c2st_lowerbound, c2st_upperbound,",
+    TESTCASECONFIG,
+)
+def test_c2st_scores(dist_sigma, c2st_lowerbound, c2st_upperbound, set_seed):
+
+    ndim = 10
+    nsamples = 1024
+
+    xnormal = tmvn(loc=torch.zeros(ndim), covariance_matrix=torch.eye(ndim))
+    ynormal = tmvn(
+        loc=dist_sigma + torch.zeros(ndim), covariance_matrix=torch.eye(ndim)
+    )
+
+    X = xnormal.sample((nsamples,))
+    Y = ynormal.sample((nsamples,))
+
+    obs_c2st = c2st_scores(X, Y)
+
+    assert hasattr(obs_c2st, "mean")
+    assert c2st_lowerbound < obs_c2st.mean()
+    assert obs_c2st.mean() <= c2st_upperbound
+
+    clf_class = MLPClassifier
+    clf_kwargs = {
+        "activation": "relu",
+        "hidden_layer_sizes": (8 * X.shape[1], X.shape[1]),
+        "max_iter": 100,
+        "solver": "adam",
+        "early_stopping": True,
+        "n_iter_no_change": 20,
+    }
+
+    obs2_c2st = c2st_scores(X, Y, clf_class=clf_class, clf_kwargs=clf_kwargs)
+
+    assert hasattr(obs2_c2st, "mean")
+    assert c2st_lowerbound < obs2_c2st.mean()
+    assert obs2_c2st.mean() <= c2st_upperbound
+
+    assert np.allclose(obs2_c2st, obs_c2st, atol=0.05)