Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor c2st and adding a unit test for metrics #503

Merged
merged 20 commits into from
Jan 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
186 changes: 159 additions & 27 deletions sbi/utils/metrics.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,178 @@
# This file is part of sbi, a toolkit for simulation-based inference. sbi is licensed
# under the Affero General Public License v3, see <https://www.gnu.org/licenses/>.

from typing import Optional
from typing import Any, Dict, Optional

import numpy as np
import torch
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import KFold, cross_val_score
from sklearn.neural_network import MLPClassifier
from torch import Tensor

from sbi.utils import tensor2numpy


def c2st(
X: Tensor,
Y: Tensor,
seed: int = 1,
n_folds: int = 5,
scoring: str = "accuracy",
metric: str = "accuracy",
classifier: str = "rf",
) -> Tensor:
"""
Return accuracy of classifier trained to distinguish samples from supposedly
two distributions <X> and <Y>. For details on the method, see [1,2].
If the returned accuracy is 0.5, <X> and <Y> are considered to be from the
same generating PDF, i.e. they can not be differentiated.
If the returned accuracy is around 1., <X> and <Y> are considered to be from
two different generating PDFs.

Training of the classifier with N-fold cross-validation [3] using sklearn.
By default, a `RandomForestClassifier` by from `sklearn.ensemble` is used
psteinb marked this conversation as resolved.
Show resolved Hide resolved
(<classifier> = 'rf'). Alternatively, a multi-layer perceptron is available
(<classifier> = 'mlp'). For a small study on the pros and cons for this
choice see [4]. Before both samples are ingested, they are normalized (z scored)
under the assumption that each dimension in X follows a normal distribution, i.e.
the mean(X) is subtracted from X and this difference is divided by std(X)
for every dimension.

If you need a more flexible interface which is able to take a sklearn
compatible classifier and more, see the `c2st_` method in this module.

Args:
X: Samples from one distribution.
Y: Samples from another distribution.
seed: Seed for the sklearn classifier and the KFold cross-validation
n_folds: Number of folds to use
metric: sklearn compliant metric to use for the scoring parameter of cross_val_score
classifier: classification architecture to use, possible values: 'rf' or 'mlp'

Return:
torch.tensor containing the mean accuracy score over the test sets
from cross-validation

Example:
``` py
> c2st(X,Y)
[0.51904464] #X and Y likely come from the same PDF or ensemble
> c2st(P,Q)
[0.998456] #P and Q likely come from two different PDFs or ensembles
```

References:
[1]: http://arxiv.org/abs/1610.06545
[2]: https://www.osti.gov/biblio/826696/
[3]: https://scikit-learn.org/stable/modules/cross_validation.html
[4]: https://github.com/psteinb/c2st/
"""

# the default configuration
clf_class = RandomForestClassifier
psteinb marked this conversation as resolved.
Show resolved Hide resolved
clf_kwargs = {}
psteinb marked this conversation as resolved.
Show resolved Hide resolved

if "mlp" in classifier.lower():
ndim = X.shape[-1]
clf_class = MLPClassifier
clf_kwargs = {
"activation": "relu",
"hidden_layer_sizes": (10 * ndim, 10 * ndim),
"max_iter": 1000,
"solver": "adam",
"early_stopping": True,
"n_iter_no_change": 50,
}

noise_scale = None
z_score = True
verbosity = 0

scores_ = c2st_scores(
X,
Y,
seed=seed,
n_folds=n_folds,
metric=metric,
z_score=z_score,
noise_scale=noise_scale,
verbosity=verbosity,
clf_class=clf_class,
clf_kwargs=clf_kwargs,
)

# TODO: unclear why np.asarray needs to be used here
scores = np.asarray(np.mean(scores_)).astype(np.float32)
value = torch.from_numpy(np.atleast_1d(scores))
return value


def c2st_scores(
psteinb marked this conversation as resolved.
Show resolved Hide resolved
X: Tensor,
Y: Tensor,
seed: int = 1,
n_folds: int = 5,
metric: str = "accuracy",
z_score: bool = True,
noise_scale: Optional[float] = None,
verbosity: int = 0,
clf_class: Any = RandomForestClassifier,
clf_kwargs: Dict[str, Any] = {},
) -> Tensor:
"""Return accuracy of classifier trained to distinguish samples from two distributions.
"""
Return accuracy of classifier trained to distinguish samples from supposedly
two distributions <X> and <Y>. For details on the method, see [1,2].
If the returned accuracy is 0.5, <X> and <Y> are considered to be from the
same generating PDF, i.e. they can not be differentiated.
If the returned accuracy is around 1., <X> and <Y> are considered to be from
two different generating PDFs.

This function performs training of the classifier with N-fold cross-validation [3] using sklearn.
By default, a `RandomForestClassifier` by from `sklearn.ensemble` is used which
is recommended based on the study performed in [4].
This can be changed using <clf_class>. This class is used in the following
fashion:

``` py
clf = clf_class(random_state=seed, **clf_kwargs)
#...
scores = cross_val_score(
clf, data, target, cv=shuffle, scoring=scoring, verbose=verbosity
)
```
Further configuration of the classifier can be performed using <clf_kwargs>.
If you like to provide a custom class for training, it has to satisfy the
internal requirements of `sklearn.model_selection.cross_val_score`.

Trains classifiers with N-fold cross-validation [1]. Scikit learn MLPClassifier are
used, with 2 hidden layers of 10x dim each, where dim is the dimensionality of the
samples X and Y.
Args:
X: Samples from one distribution.
Y: Samples from another distribution.
seed: Seed for sklearn
n_folds: Number of folds
z_score: Z-scoring using X
noise_scale: If passed, will add Gaussian noise with std noise_scale to samples
seed: Seed for the sklearn classifier and the KFold cross validation
n_folds: Number of folds to use for cross validation
metric: sklearn compliant metric to use for the scoring parameter of cross_val_score
z_score: Z-scoring using X, i.e. mean and std deviation of X is used to normalize Y, i.e. Y=(Y - mean)/std
noise_scale: If passed, will add Gaussian noise with standard deviation <noise_scale> to samples of X and of Y
verbosity: control the verbosity of sklearn.model_selection.cross_val_score
clf_class: a scikit-learn classifier class
clf_kwargs: key-value arguments dictionary to the class specified by clf_class, e.g. sklearn.ensemble.RandomForestClassifier

Return:
np.ndarray containing the calculated <metric> scores over the test set
folds from cross-validation

Example:
``` py
> c2st_scores(X,Y)
[0.51904464,0.5309201,0.4959452,0.5487709,0.50682926]
#X and Y likely come from the same PDF or ensemble
> c2st_scores(P,Q)
[0.998456,0.9982912,0.9980476,0.9980488,0.99805826]
#P and Q likely come from two different PDFs or ensembles
```

References:
[1]: https://scikit-learn.org/stable/modules/cross_validation.html
[1]: http://arxiv.org/abs/1610.06545
[2]: https://www.osti.gov/biblio/826696/
[3]: https://scikit-learn.org/stable/modules/cross_validation.html
[4]: https://github.com/psteinb/c2st/
"""
if z_score:
X_mean = torch.mean(X, axis=0)
Expand All @@ -47,27 +184,22 @@ def c2st(
X += noise_scale * torch.randn(X.shape)
Y += noise_scale * torch.randn(Y.shape)

X = tensor2numpy(X)
Y = tensor2numpy(Y)
X = X.cpu().numpy()
Y = Y.cpu().numpy()

ndim = X.shape[1]

clf = MLPClassifier(
psteinb marked this conversation as resolved.
Show resolved Hide resolved
activation="relu",
hidden_layer_sizes=(10 * ndim, 10 * ndim),
max_iter=1000,
solver="adam",
random_state=seed,
)
clf = clf_class(random_state=seed, **clf_kwargs)

# prepare data
data = np.concatenate((X, Y))
# labels
target = np.concatenate((np.zeros((X.shape[0],)), np.ones((Y.shape[0],))))

shuffle = KFold(n_splits=n_folds, shuffle=True, random_state=seed)
scores = cross_val_score(clf, data, target, cv=shuffle, scoring=scoring)
scores = cross_val_score(
clf, data, target, cv=shuffle, scoring=metric, verbose=verbosity
)

scores = np.asarray(np.mean(scores)).astype(np.float32)
return torch.from_numpy(np.atleast_1d(scores))
return scores


def unbiased_mmd_squared(x, y):
Expand Down
132 changes: 132 additions & 0 deletions tests/metrics_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# This file is part of sbi, a toolkit for simulation-based inference. sbi is licensed
psteinb marked this conversation as resolved.
Show resolved Hide resolved
# under the Affero General Public License v3, see <https://www.gnu.org/licenses/>.

from __future__ import annotations

import numpy as np
import pytest
import torch
from sklearn.neural_network import MLPClassifier
from torch.distributions import MultivariateNormal as tmvn
psteinb marked this conversation as resolved.
Show resolved Hide resolved

from sbi.utils.metrics import c2st, c2st_scores

## c2st related:
## for a study about c2st see https://github.com/psteinb/c2st/

TESTCASECONFIG = [
(
# both samples are identical, the mean accuracy should be around 0.5
0.0, # dist_sigma
0.45, # c2st_lowerbound
0.55, # c2st_upperbound
),
(
# both samples are rather close, the mean accuracy should be larger than 0.5 and be lower than 1.
1.0,
0.85,
1.0,
),
(
# both samples are very far apart, the mean accuracy should close to 1.
20.0,
0.98,
1.0,
),
]


@pytest.mark.parametrize(
"dist_sigma, c2st_lowerbound, c2st_upperbound,",
TESTCASECONFIG,
)
def test_c2st_with_different_distributions(
dist_sigma, c2st_lowerbound, c2st_upperbound, set_seed
):

ndim = 10
nsamples = 1024

refdist = tmvn(loc=torch.zeros(ndim), covariance_matrix=torch.eye(ndim))
otherdist = tmvn(
loc=dist_sigma + torch.zeros(ndim), covariance_matrix=torch.eye(ndim)
)

X = refdist.sample((nsamples,))
Y = otherdist.sample((nsamples,))

obs_c2st = c2st(X, Y)

assert len(obs_c2st) > 0
assert c2st_lowerbound < obs_c2st[0]
assert obs_c2st[0] <= c2st_upperbound


@pytest.mark.slow
@pytest.mark.parametrize(
"dist_sigma, c2st_lowerbound, c2st_upperbound,",
TESTCASECONFIG,
)
def test_c2st_with_different_distributions_mlp(
dist_sigma, c2st_lowerbound, c2st_upperbound, set_seed
):

ndim = 10
nsamples = 1024

refdist = tmvn(loc=torch.zeros(ndim), covariance_matrix=torch.eye(ndim))
otherdist = tmvn(
loc=dist_sigma + torch.zeros(ndim), covariance_matrix=torch.eye(ndim)
)

X = refdist.sample((nsamples,))
Y = otherdist.sample((nsamples,))

obs_c2st = c2st(X, Y, classifier="mlp")

assert len(obs_c2st) > 0
assert c2st_lowerbound < obs_c2st[0]
assert obs_c2st[0] <= c2st_upperbound


@pytest.mark.slow
@pytest.mark.parametrize(
"dist_sigma, c2st_lowerbound, c2st_upperbound,",
TESTCASECONFIG,
)
def test_c2st_scores(dist_sigma, c2st_lowerbound, c2st_upperbound, set_seed):

ndim = 10
nsamples = 1024

xnormal = tmvn(loc=torch.zeros(ndim), covariance_matrix=torch.eye(ndim))
ynormal = tmvn(
loc=dist_sigma + torch.zeros(ndim), covariance_matrix=torch.eye(ndim)
)

X = xnormal.sample((nsamples,))
Y = ynormal.sample((nsamples,))

obs_c2st = c2st_scores(X, Y)

assert hasattr(obs_c2st, "mean")
assert c2st_lowerbound < obs_c2st.mean()
assert obs_c2st.mean() <= c2st_upperbound

clf_class = MLPClassifier
clf_kwargs = {
"activation": "relu",
"hidden_layer_sizes": (8 * X.shape[1], X.shape[1]),
"max_iter": 100,
"solver": "adam",
"early_stopping": True,
"n_iter_no_change": 20,
}

obs2_c2st = c2st_scores(X, Y, clf_class=clf_class, clf_kwargs=clf_kwargs)

assert hasattr(obs2_c2st, "mean")
assert c2st_lowerbound < obs2_c2st.mean()
assert obs2_c2st.mean() <= c2st_upperbound

assert np.allclose(obs2_c2st, obs_c2st, atol=0.05)