sbi-dev · janfb · Jan 28, 2022 · Jun 3, 2021 · Jun 3, 2021 · Jun 3, 2021
diff --git a/sbi/utils/metrics.py b/sbi/utils/metrics.py
@@ -5,12 +5,11 @@
 
 import numpy as np
 import torch
+from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import KFold, cross_val_score
 from sklearn.neural_network import MLPClassifier
 from torch import Tensor
 
-from sbi.utils import tensor2numpy
-
 
 def c2st(
     X: Tensor,
@@ -20,22 +19,156 @@ def c2st(
     scoring: str = "accuracy",
     z_score: bool = True,
     noise_scale: Optional[float] = None,
+    verbosity: int = 0,
+    classifier: str = "rf",
 ) -> Tensor:
-    """Return accuracy of classifier trained to distinguish samples from two distributions.
+    """
+    Return accuracy of classifier trained to distinguish samples from supposedly
+    two distributions <X> and <Y>. For details on the method, see [1,2].
+    If the returned accuracy is 0.5, <X> and <Y> are considered to be from the
+    same generating PDF, i.e. they can not be differentiated.
+    If the returned accuracy is around 1., <X> and <Y> are considered to be from
+    two different generating PDFs.
+
+    Training of the classifier with N-fold cross-validation [3] using sklearn.
+    By default, a `RandomForestClassifier` by from `sklearn.ensemble` is used
+    (<classifier> = 'rf'). Alternatively, a multi-layer perceptron is available
+    (<classifier> = 'mlp'). For a small study on the pros and cons for this
+    choice see [4].
+    If you need a more flexible interface which is able to take a sklearn
+    compatible classifier, see the `c2st_` method in this module.
 
-    Trains classifiers with N-fold cross-validation [1]. Scikit learn MLPClassifier are
-    used, with 2 hidden layers of 10x dim each, where dim is the dimensionality of the
-    samples X and Y.
     Args:
         X: Samples from one distribution.
         Y: Samples from another distribution.
-        seed: Seed for sklearn
-        n_folds: Number of folds
-        z_score: Z-scoring using X
-        noise_scale: If passed, will add Gaussian noise with std noise_scale to samples
+        seed: Seed for the sklearn classifier and the KFold cross-validation
+        n_folds: Number of folds to use
+        z_score: Z-scoring using X, i.e. mean and std deviation of X is used to normalize Y using (Y - mean)/std
+        noise_scale: If passed, will add Gaussian noise with std noise_scale to samples of X and of Y
+        verbosity: control the verbosity of sklearn.model_selection.cross_val_score
+        classifier: classification architecture to use, possible values: 'rf' or 'mlp'
+
+    Return:
+        torch.tensor containing the mean accuracy score over the test sets
+        from cross-validation
+
+    Example:
+    ``` py
+    > c2st(X,Y)
+    [0.51904464] #X and Y likely come from the same PDF or ensemble
+    > c2st(P,Q)
+    [0.998456] #P and Q likely come from two different PDFs or ensembles
+    ```
 
     References:
-        [1]: https://scikit-learn.org/stable/modules/cross_validation.html
+        [1]: http://arxiv.org/abs/1610.06545
+        [2]: https://www.osti.gov/biblio/826696/
+        [3]: https://scikit-learn.org/stable/modules/cross_validation.html
+        [4]: https://github.com/psteinb/c2st/
+    """
+
+    clf_class = RandomForestClassifier
+    clf_kwargs = {}
+    if "mlp" in classifier.lower():
+        ndim = X.shape[-1]
+        clf_class = MLPClassifier
+        clf_kwargs = {
+            "activation": "relu",
+            "hidden_layer_sizes": (10 * ndim, 10 * ndim),
+            "max_iter": 1000,
+            "solver": "adam",
+            "early_stopping": True,
+            "n_iter_no_change": 50,
+        }
+    elif "rf" in classifier.lower():
+        # keep this section for later use
+        pass
+
+    scores_ = c2st_scores(
+        X,
+        Y,
+        seed=seed,
+        n_folds=n_folds,
+        scoring=scoring,
+        z_score=z_score,
+        noise_scale=noise_scale,
+        verbosity=verbosity,
+        clf_class=clf_class,
+        clf_kwargs=clf_kwargs,
+    )
+
+    scores = np.asarray(np.mean(scores_)).astype(np.float32)
+    value = torch.from_numpy(np.atleast_1d(scores))
+    return value
+
+
+def c2st_scores(
+    X: Tensor,
+    Y: Tensor,
+    seed: int = 1,
+    n_folds: int = 5,
+    scoring: str = "accuracy",
+    z_score: bool = True,
+    noise_scale: Optional[float] = None,
+    verbosity: int = 0,
+    clf_class=RandomForestClassifier,
+    clf_kwargs={},
+) -> Tensor:
+    """
+    Return accuracy of classifier trained to distinguish samples from supposedly
+    two distributions <X> and <Y>. For details on the method, see [1,2].
+    If the returned accuracy is 0.5, <X> and <Y> are considered to be from the
+    same generating PDF, i.e. they can not be differentiated.
+    If the returned accuracy is around 1., <X> and <Y> are considered to be from
+    two different generating PDFs.
+
+    This function performs training of the classifier with N-fold cross-validation [3] using sklearn.
+    By default, a `RandomForestClassifier` by from `sklearn.ensemble` is used which
+    is recommended based on the study performed in [4].
+    This can be changed using <clf_class>. This class is used in the following
+    fashion:
+
+    ``` py
+    clf = clf_class(random_state=seed, **clf_kwargs)
+    #...
+    scores = cross_val_score(
+        clf, data, target, cv=shuffle, scoring=scoring, verbose=verbosity
+    )
+    ```
+    Further configuration of the classifier can be performed using <clf_kwargs>.
+    If you like to provide a custom class for training, it has to satisfy the
+    internal requirements of `sklearn.model_selection.cross_val_score`.
+
+    Args:
+        X: Samples from one distribution.
+        Y: Samples from another distribution.
+        seed: Seed for the sklearn classifier and the KFold cross-validation
+        n_folds: Number of folds to use
+        z_score: Z-scoring using X, i.e. mean and std deviation of X is used to normalize Y using (Y - mean)/std
+        noise_scale: If passed, will add Gaussian noise with std noise_scale to samples of X and of Y
+        verbosity: control the verbosity of sklearn.model_selection.cross_val_score
+        clf_class: a scikit-learn classifier class
+        clf_kwargs: key-value arguments dictionary to the class specified by clf_class, e.g. sklearn.ensemble.RandomForestClassifier
+
+    Return:
+        np.ndarray containing the calculated <scoring> scores over the test set
+        folds from cross-validation
+
+    Example:
+    ``` py
+    > c2st_scores(X,Y)
+    [0.51904464,0.5309201,0.4959452,0.5487709,0.50682926]
+    #X and Y likely come from the same PDF or ensemble
+    > c2st_scores(P,Q)
+    [0.998456,0.9982912,0.9980476,0.9980488,0.99805826]
+    #P and Q likely come from two different PDFs or ensembles
+    ```
+
+    References:
+        [1]: http://arxiv.org/abs/1610.06545
+        [2]: https://www.osti.gov/biblio/826696/
+        [3]: https://scikit-learn.org/stable/modules/cross_validation.html
+        [4]: https://github.com/psteinb/c2st/
     """
     if z_score:
         X_mean = torch.mean(X, axis=0)
@@ -47,27 +180,22 @@ def c2st(
         X += noise_scale * torch.randn(X.shape)
         Y += noise_scale * torch.randn(Y.shape)
 
-    X = tensor2numpy(X)
-    Y = tensor2numpy(Y)
+    X = X.cpu().numpy()
+    Y = Y.cpu().numpy()
 
-    ndim = X.shape[1]
-
-    clf = MLPClassifier(
-        activation="relu",
-        hidden_layer_sizes=(10 * ndim, 10 * ndim),
-        max_iter=1000,
-        solver="adam",
-        random_state=seed,
-    )
+    clf = clf_class(random_state=seed, **clf_kwargs)
 
+    # prepare data
     data = np.concatenate((X, Y))
+    # labels
     target = np.concatenate((np.zeros((X.shape[0],)), np.ones((Y.shape[0],))))
 
     shuffle = KFold(n_splits=n_folds, shuffle=True, random_state=seed)
-    scores = cross_val_score(clf, data, target, cv=shuffle, scoring=scoring)
+    scores = cross_val_score(
+        clf, data, target, cv=shuffle, scoring=scoring, verbose=verbosity
+    )
 
-    scores = np.asarray(np.mean(scores)).astype(np.float32)
-    return torch.from_numpy(np.atleast_1d(scores))
+    return scores
 
 
 def unbiased_mmd_squared(x, y):

diff --git a/tests/metrics_test.py b/tests/metrics_test.py
@@ -0,0 +1,150 @@
+# This file is part of sbi, a toolkit for simulation-based inference. sbi is licensed
+# under the Affero General Public License v3, see <https://www.gnu.org/licenses/>.
+
+from __future__ import annotations
+
+import numpy as np
+import pytest
+import torch
+from sklearn.neural_network import MLPClassifier
+from torch.distributions import MultivariateNormal as tmvn
+
+from sbi.utils.metrics import c2st, c2st_scores
+
+## c2st related:
+## for a study about c2st see https://github.com/psteinb/c2st/
+
+
+def test_same_distributions(set_seed):
+
+    ndim = 10
+    nsamples = 1024
+
+    xnormal = tmvn(loc=torch.zeros(ndim), covariance_matrix=torch.eye(ndim))
+
+    X = xnormal.sample((nsamples,))
+    Y = xnormal.sample((nsamples,))
+
+    obs_c2st = c2st(X, Y, seed=set_seed)
+
+    assert obs_c2st != None
+    assert 0.45 < obs_c2st[0] < 0.55  # only by chance we differentiate the 2 samples
+
+
+def test_diff_distributions(set_seed):
+
+    ndim = 10
+    nsamples = 1024
+
+    xnormal = tmvn(loc=torch.zeros(ndim), covariance_matrix=torch.eye(ndim))
+    ynormal = tmvn(loc=20.0 * torch.ones(ndim), covariance_matrix=torch.eye(ndim))
+
+    X = xnormal.sample((nsamples,))
+    Y = ynormal.sample((nsamples,))
+
+    obs_c2st = c2st(X, Y, seed=set_seed)
+
+    assert obs_c2st != None
+    assert (
+        0.98 < obs_c2st[0]
+    )  # distributions do not overlap, classifiers label with high accuracy
+    print(obs_c2st)
+
+
+def test_onesigma_apart_distributions(set_seed):
+
+    ndim = 10
+    nsamples = 1024
+
+    xnormal = tmvn(loc=torch.zeros(ndim), covariance_matrix=torch.eye(ndim))
+    ynormal = tmvn(loc=torch.ones(ndim), covariance_matrix=torch.eye(ndim))
+
+    X = xnormal.sample((nsamples,))
+    Y = ynormal.sample((nsamples,))
+
+    obs_c2st = c2st(X, Y, seed=set_seed)
+
+    assert obs_c2st != None
+    print(obs_c2st)
+    assert (
+        0.85 < obs_c2st[0]
+    )  # distributions do not overlap, classifiers label with high accuracy
+
+
+@pytest.mark.slow
+def test_same_distributions_mlp(set_seed):
+
+    ndim = 10
+    nsamples = 1024
+
+    xnormal = tmvn(loc=torch.zeros(ndim), covariance_matrix=torch.eye(ndim))
+
+    X = xnormal.sample((nsamples,))
+    Y = xnormal.sample((nsamples,))
+
+    obs_c2st = c2st(X, Y, seed=set_seed, classifier="mlp")
+
+    assert obs_c2st != None
+    assert 0.45 < obs_c2st[0] < 0.55  # only by chance we differentiate the 2 samples
+
+
+@pytest.mark.slow
+def test_diff_distributions_flexible_mlp(set_seed):
+
+    ndim = 10
+    nsamples = 1024
+
+    xnormal = tmvn(loc=torch.zeros(ndim), covariance_matrix=torch.eye(ndim))
+    ynormal = tmvn(loc=20.0 * torch.ones(ndim), covariance_matrix=torch.eye(ndim))
+
+    X = xnormal.sample((nsamples,))
+    Y = ynormal.sample((nsamples,))
+
+    obs_c2st = c2st(X, Y, seed=set_seed, classifier="rf")
+
+    assert obs_c2st != None
+    assert 0.95 < obs_c2st[0]
+
+    obs2_c2st = c2st(X, Y, seed=set_seed, classifier="mlp")
+
+    assert obs2_c2st != None
+    assert 0.95 < obs2_c2st[0]  # only by chance we differentiate the 2 samples
+    assert np.allclose(obs2_c2st, obs_c2st, atol=0.01)
+
+
+def test_c2st_scores(set_seed):
+
+    ndim = 10
+    nsamples = 1024
+
+    xnormal = tmvn(loc=torch.zeros(ndim), covariance_matrix=torch.eye(ndim))
+    ynormal = tmvn(loc=20 * torch.ones(ndim), covariance_matrix=torch.eye(ndim))
+
+    X = xnormal.sample((nsamples,))
+    Y = ynormal.sample((nsamples,))
+
+    obs_c2st = c2st_scores(X, Y, seed=set_seed)
+
+    print(obs_c2st)
+    assert (
+        0.9 < obs_c2st.mean()
+    )  # distributions do not overlap, classifiers label with high accuracy
+
+    clf_class = MLPClassifier
+    clf_kwargs = {
+        "activation": "relu",
+        "hidden_layer_sizes": (8 * X.shape[1], X.shape[1]),
+        "max_iter": 100,
+        "solver": "adam",
+        "early_stopping": True,
+        "n_iter_no_change": 20,
+    }
+
+    obs2_c2st = c2st_scores(
+        X, Y, seed=set_seed, clf_class=clf_class, clf_kwargs=clf_kwargs
+    )
+
+    assert (
+        0.9 < obs2_c2st.mean()
+    )  # distributions do not overlap, classifiers label with high accuracy
+    assert np.allclose(obs2_c2st, obs_c2st, atol=0.05)