From 2833d7714c42b126d7c67e7d8aaf580ccb40abb5 Mon Sep 17 00:00:00 2001 From: janfb Date: Thu, 25 Jan 2024 17:01:21 +0100 Subject: [PATCH 01/12] refactor embedding net tets --- tests/embedding_net_test.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/embedding_net_test.py b/tests/embedding_net_test.py index eede90e4a..5a97aa985 100644 --- a/tests/embedding_net_test.py +++ b/tests/embedding_net_test.py @@ -58,8 +58,11 @@ def test_embedding_net_api(method, num_dim: int, embedding_net: str): else: raise NameError - _ = inference.append_simulations(theta, x).train(max_num_epochs=5) - posterior = inference.build_posterior().set_default_x(x_o) + _ = inference.append_simulations(theta, x).train(max_num_epochs=2) + posterior = inference.build_posterior( + mcmc_method="slice_np_vectorized", + mcmc_parameters=dict(num_chains=2, warmup_steps=10, thin=5), + ).set_default_x(x_o) s = posterior.sample((1,)) _ = posterior.potential(s) From 50122c5da92530516bb4e4074d4fc22b8d063f6e Mon Sep 17 00:00:00 2001 From: janfb Date: Thu, 25 Jan 2024 17:01:41 +0100 Subject: [PATCH 02/12] refactor snle tests --- tests/linearGaussian_snle_test.py | 77 +++++++++++++++---------------- 1 file changed, 37 insertions(+), 40 deletions(-) diff --git a/tests/linearGaussian_snle_test.py b/tests/linearGaussian_snle_test.py index 242eb9220..a766e05a0 100644 --- a/tests/linearGaussian_snle_test.py +++ b/tests/linearGaussian_snle_test.py @@ -29,30 +29,38 @@ from .test_utils import check_c2st, get_prob_outside_uniform_prior +# mcmc params for fast testing. +mcmc_parameters = { + "method": "slice_np_vectorized", + "num_chains": 20, + "thin": 5, + "warmup_steps": 50, +} -@pytest.mark.parametrize("num_dim", (1, 3)) -def test_api_snl_on_linearGaussian(num_dim: int): - """Test API for inference on linear Gaussian model using SNL. - Avoids expensive computations by training on few simulations and generating few - posterior samples. - - Args: - num_dim: parameter dimension of the gaussian model - """ +@pytest.mark.parametrize("num_dim", (1,)) # dim 3 is tested below. +@pytest.mark.parametrize("prior_str", ("uniform", "gaussian")) +def test_api_snl_on_linearGaussian(num_dim: int, prior_str: str): + """Test SNLE API with different priors and different number of trials.""" num_samples = 10 + num_simulations = 100 - prior_mean = zeros(num_dim) - prior_cov = eye(num_dim) - prior = MultivariateNormal(loc=prior_mean, covariance_matrix=prior_cov) + if prior_str == "gaussian": + prior_mean = zeros(num_dim) + prior_cov = eye(num_dim) + prior = MultivariateNormal(loc=prior_mean, covariance_matrix=prior_cov) + else: + prior = BoxUniform(-2.0 * ones(num_dim), 2.0 * ones(num_dim)) simulator, prior = prepare_for_sbi(diagonal_linear_gaussian, prior) density_estimator = likelihood_nn("maf", num_transforms=3) inference = SNLE(density_estimator=density_estimator, show_progress_bars=False) - theta, x = simulate_for_sbi(simulator, prior, 1000, simulation_batch_size=50) + theta, x = simulate_for_sbi( + simulator, prior, num_simulations, simulation_batch_size=num_simulations + ) likelihood_estimator = inference.append_simulations(theta, x).train( - training_batch_size=100 + training_batch_size=100, max_num_epochs=2 ) for num_trials in [1, 2]: @@ -64,26 +72,21 @@ def test_api_snl_on_linearGaussian(num_dim: int): proposal=prior, potential_fn=potential_fn, theta_transform=theta_transform, - thin=3, + **mcmc_parameters, ) posterior.sample(sample_shape=(num_samples,)) -def test_c2st_snl_on_linearGaussian(density_estimator="maf"): - """Test whether SNL infers well a simple example with available ground truth. - - This example has different number of parameters theta than number of x. This test - also acts as the only functional test for SNL not marked as slow. - - """ +def test_c2st_snl_on_linearGaussian_different_dims(model_str="maf"): + """Test SNLE on linear Gaussian task with different theta and x dims.""" theta_dim = 3 x_dim = 2 discard_dims = theta_dim - x_dim x_o = zeros(1, x_dim) - num_samples = 1000 - num_simulations = 3000 + num_samples = 500 + num_simulations = 1000 # likelihood_mean will be likelihood_shift+theta likelihood_shift = -1.0 * ones(x_dim) @@ -110,11 +113,11 @@ def test_c2st_snl_on_linearGaussian(density_estimator="maf"): ), prior, ) - density_estimator = likelihood_nn(model=density_estimator, num_transforms=3) + density_estimator = likelihood_nn(model=model_str, num_transforms=3) inference = SNLE(density_estimator=density_estimator, show_progress_bars=False) theta, x = simulate_for_sbi( - simulator, prior, num_simulations, simulation_batch_size=50 + simulator, prior, num_simulations, simulation_batch_size=num_simulations ) likelihood_estimator = inference.append_simulations(theta, x).train() potential_fn, theta_transform = likelihood_estimator_based_potential( @@ -124,18 +127,16 @@ def test_c2st_snl_on_linearGaussian(density_estimator="maf"): proposal=prior, potential_fn=potential_fn, theta_transform=theta_transform, - method="slice_np_vectorized", - num_chains=5, - thin=10, + **mcmc_parameters, ) samples = posterior.sample((num_samples,)) # Compute the c2st and assert it is near chance level of 0.5. - check_c2st(samples, target_samples, alg=f"snle_a-{density_estimator}") + check_c2st(samples, target_samples, alg=f"snle_a-model_str") @pytest.mark.slow -@pytest.mark.parametrize("num_dim", (1, 2)) +@pytest.mark.parametrize("num_dim", (2,)) @pytest.mark.parametrize("prior_str", ("uniform", "gaussian")) def test_c2st_and_map_snl_on_linearGaussian_different(num_dim: int, prior_str: str): """Test SNL on linear Gaussian, comparing to ground truth posterior via c2st. @@ -199,9 +200,7 @@ def test_c2st_and_map_snl_on_linearGaussian_different(num_dim: int, prior_str: s proposal=prior, potential_fn=potential_fn, theta_transform=theta_transform, - method="slice_np_vectorized", - thin=5, - num_chains=5, + **mcmc_parameters, ) samples = posterior.sample(sample_shape=(num_samples,)) @@ -309,8 +308,7 @@ def test_c2st_multi_round_snl_on_linearGaussian(num_trials: int): proposal=prior, potential_fn=potential_fn, theta_transform=theta_transform, - thin=5, - num_chains=20, + **mcmc_parameters, ) theta, x = simulate_for_sbi( @@ -327,8 +325,7 @@ def test_c2st_multi_round_snl_on_linearGaussian(num_trials: int): proposal=prior, potential_fn=potential_fn, theta_transform=theta_transform, - thin=5, - num_chains=20, + **mcmc_parameters, ) samples = posterior.sample(sample_shape=(num_samples,)) @@ -444,7 +441,7 @@ def test_api_snl_sampling_methods( num_simulations = 1000 x_o = zeros((num_trials, num_dim)) # Test for multiple chains is cheap when vectorized. - num_chains = 3 if sampling_method == "slice_np_vectorized" else 1 + num_chains = 10 if sampling_method == "slice_np_vectorized" else 1 if sampling_method == "rejection": sample_with = "rejection" elif ( @@ -491,7 +488,7 @@ def test_api_snl_sampling_methods( proposal=prior, theta_transform=theta_transform, method=sampling_method, - thin=3, + thin=5, num_chains=num_chains, init_strategy=init_strategy, ) From 5e8b13fbb9e52b56a3d4429e6e3d2b1bd9d9cba0 Mon Sep 17 00:00:00 2001 From: janfb Date: Thu, 25 Jan 2024 17:01:49 +0100 Subject: [PATCH 03/12] refactor snpe tests --- tests/linearGaussian_snpe_test.py | 47 +++++++++++++++---------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/tests/linearGaussian_snpe_test.py b/tests/linearGaussian_snpe_test.py index 1fe2eb61e..53ac44cd5 100644 --- a/tests/linearGaussian_snpe_test.py +++ b/tests/linearGaussian_snpe_test.py @@ -43,24 +43,19 @@ @pytest.mark.parametrize("snpe_method", [SNPE_A, SNPE_C]) @pytest.mark.parametrize( - "num_dim, prior_str, num_trials", + "num_dim, prior_str", ( - (2, "gaussian", 1), - (2, "uniform", 1), - (1, "gaussian", 1), - # no iid x in snpe. - pytest.param(1, "gaussian", 2, marks=pytest.mark.xfail), - pytest.param(2, "gaussian", 2, marks=pytest.mark.xfail), + (2, "gaussian"), + (2, "uniform"), + (1, "gaussian"), ), ) -def test_c2st_snpe_on_linearGaussian( - snpe_method, num_dim: int, prior_str: str, num_trials: int -): +def test_c2st_snpe_on_linearGaussian(snpe_method, num_dim: int, prior_str: str): """Test whether SNPE infers well a simple example with available ground truth.""" - x_o = zeros(num_trials, num_dim) - num_samples = 5000 - num_simulations = 5000 + x_o = zeros(1, num_dim) + num_samples = 1000 + num_simulations = 2500 # likelihood_mean will be likelihood_shift+theta likelihood_shift = -1.0 * ones(num_dim) @@ -203,13 +198,7 @@ def test_density_estimators_on_linearGaussian(density_estimtor): def test_c2st_snpe_on_linearGaussian_different_dims(density_estimator="maf"): - """Test whether SNPE B/C infer well a simple example with available ground truth. - - This test uses a linear Gaussian example with different number of parameters and - data dimensions. It tests different density estimators. Additionally, it implicitly - tests whether the prior can be `None` and whether we can stop and resume training. - - """ + """Test SNPE on linear Gaussian with different theta and x dimensionality.""" theta_dim = 3 x_dim = 2 @@ -217,6 +206,7 @@ def test_c2st_snpe_on_linearGaussian_different_dims(density_estimator="maf"): x_o = zeros(1, x_dim) num_samples = 1000 + num_simulations = 2000 # likelihood_mean will be likelihood_shift+theta likelihood_shift = -1.0 * ones(x_dim) @@ -252,7 +242,9 @@ def test_c2st_snpe_on_linearGaussian_different_dims(density_estimator="maf"): ) # type: ignore - theta, x = simulate_for_sbi(simulator, prior, 2000, simulation_batch_size=1) + theta, x = simulate_for_sbi( + simulator, prior, num_simulations, simulation_batch_size=num_simulations + ) inference = inference.append_simulations(theta, x) posterior_estimator = inference.train( @@ -267,7 +259,7 @@ def test_c2st_snpe_on_linearGaussian_different_dims(density_estimator="maf"): samples = posterior.sample((num_samples,)) # Compute the c2st and assert it is near chance level of 0.5. - check_c2st(samples, target_samples, alg="snpe_c") + check_c2st(samples, target_samples, alg="snpe_c_different_dims") # Test multi-round SNPE. @@ -688,6 +680,7 @@ def test_example_posterior(snpe_method: type): """Return an inferred `NeuralPosterior` for interactive examination.""" num_dim = 2 x_o = zeros(1, num_dim) + num_simulations = 100 # likelihood_mean will be likelihood_shift+theta likelihood_shift = -1.0 * ones(num_dim) @@ -709,9 +702,15 @@ def test_example_posterior(snpe_method: type): inference = snpe_method(prior, show_progress_bars=False) theta, x = simulate_for_sbi( - simulator, prior, 1000, simulation_batch_size=10, num_workers=6 + simulator, + prior, + num_simulations, + simulation_batch_size=num_simulations, + num_workers=6, + ) + posterior_estimator = inference.append_simulations(theta, x).train( + max_num_epochs=2, **extra_kwargs ) - posterior_estimator = inference.append_simulations(theta, x).train(**extra_kwargs) if snpe_method == SNPE_A: posterior_estimator = inference.correct_for_proposal() posterior = DirectPosterior( From 04bd9e393e70ab7cb1de9083f7875287acd26c38 Mon Sep 17 00:00:00 2001 From: janfb Date: Thu, 25 Jan 2024 17:01:59 +0100 Subject: [PATCH 04/12] refactor snre tests --- tests/linearGaussian_snre_test.py | 39 ++++++++++++++++--------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/tests/linearGaussian_snre_test.py b/tests/linearGaussian_snre_test.py index f6e277f47..0c2a3be5b 100644 --- a/tests/linearGaussian_snre_test.py +++ b/tests/linearGaussian_snre_test.py @@ -35,8 +35,16 @@ get_prob_outside_uniform_prior, ) +# mcmc params for fast testing. +mcmc_parameters = { + "method": "slice_np_vectorized", + "num_chains": 20, + "thin": 5, + "warmup_steps": 50, +} -@pytest.mark.parametrize("num_dim", (1, 3)) + +@pytest.mark.parametrize("num_dim", (1,)) # dim 3 is tested below. @pytest.mark.parametrize("SNRE", (SNRE_B, SNRE_C)) def test_api_sre_on_linearGaussian(num_dim: int, SNRE: RatioEstimator): """Test inference API of SRE with linear Gaussian model. @@ -52,7 +60,9 @@ def test_api_sre_on_linearGaussian(num_dim: int, SNRE: RatioEstimator): simulator, prior = prepare_for_sbi(diagonal_linear_gaussian, prior) inference = SNRE(classifier="resnet", show_progress_bars=False) - theta, x = simulate_for_sbi(simulator, prior, 1000, simulation_batch_size=50) + theta, x = simulate_for_sbi( + simulator, prior, num_simulations=100, simulation_batch_size=100 + ) ratio_estimator = inference.append_simulations(theta, x).train(max_num_epochs=5) for num_trials in [1, 2]: @@ -64,7 +74,7 @@ def test_api_sre_on_linearGaussian(num_dim: int, SNRE: RatioEstimator): potential_fn=potential_fn, theta_transform=theta_transform, proposal=prior, - num_chains=2, + **mcmc_parameters, ) posterior.sample(sample_shape=(10,)) posterior.map(num_iter=1) @@ -82,7 +92,7 @@ def test_c2st_sre_on_linearGaussian(SNRE: RatioEstimator): theta_dim = 3 x_dim = 2 discard_dims = theta_dim - x_dim - num_samples = 1000 + num_samples = 500 num_simulations = 2100 likelihood_shift = -1.0 * ones( @@ -100,18 +110,14 @@ def test_c2st_sre_on_linearGaussian(SNRE: RatioEstimator): ), prior, ) - inference = SNRE( - classifier="resnet", - show_progress_bars=False, - ) + inference = SNRE(classifier="resnet", show_progress_bars=False) theta, x = simulate_for_sbi( simulator, prior, num_simulations, simulation_batch_size=100 ) ratio_estimator = inference.append_simulations(theta, x).train() - num_trials = 1 - x_o = zeros(num_trials, x_dim) + x_o = zeros(1, x_dim) target_samples = samples_true_posterior_linear_gaussian_mvn_prior_different_dims( x_o, likelihood_shift, @@ -128,13 +134,12 @@ def test_c2st_sre_on_linearGaussian(SNRE: RatioEstimator): potential_fn=potential_fn, theta_transform=theta_transform, proposal=prior, - thin=5, - num_chains=2, + **mcmc_parameters, ) samples = posterior.sample((num_samples,)) # Compute the c2st and assert it is near chance level of 0.5. - check_c2st(samples, target_samples, alg=f"snre-{num_trials}trials") + check_c2st(samples, target_samples, alg=f"snre-{SNRE.__name__}") @pytest.mark.slow @@ -218,9 +223,7 @@ def simulator(theta): potential_fn=potential_fn, theta_transform=theta_transform, proposal=prior, - method="slice_np_vectorized", - thin=5, - num_chains=5, + **mcmc_parameters, ) samples = posterior.sample(sample_shape=(num_samples,)) @@ -415,9 +418,7 @@ def test_api_sre_sampling_methods(sampling_method: str, prior_str: str): potential_fn, proposal=prior, theta_transform=theta_transform, - method=sampling_method, - thin=3, - num_chains=num_chains, + **mcmc_parameters.update({"method": sampling_method}), ) elif sample_with == "importance": posterior = ImportanceSamplingPosterior( From 6b1d1b3dc1273692b417ee536cb8c172364e0a99 Mon Sep 17 00:00:00 2001 From: janfb Date: Thu, 25 Jan 2024 17:02:11 +0100 Subject: [PATCH 05/12] refactor mcmc tests --- tests/mcmc_test.py | 97 +++++++++++----------------------------------- 1 file changed, 23 insertions(+), 74 deletions(-) diff --git a/tests/mcmc_test.py b/tests/mcmc_test.py index 2eb87bc5a..a51383d48 100644 --- a/tests/mcmc_test.py +++ b/tests/mcmc_test.py @@ -3,8 +3,6 @@ from __future__ import annotations -from math import ceil - import arviz as az import numpy as np import pytest @@ -28,7 +26,7 @@ diagonal_linear_gaussian, true_posterior_linear_gaussian_mvn_prior, ) -from sbi.utils import likelihood_nn, tensor2numpy +from sbi.utils import likelihood_nn from tests.test_utils import check_c2st @@ -74,7 +72,10 @@ def lp_f(x): @pytest.mark.parametrize("num_dim", (1, 2)) @pytest.mark.parametrize("slice_sampler", (SliceSamplerVectorized, SliceSamplerSerial)) -def test_c2st_slice_np_vectorized_on_Gaussian(num_dim: int, slice_sampler): +@pytest.mark.parametrize("num_workers", (1, 2)) +def test_c2st_slice_np_vectorized_parallelized_on_Gaussian( + num_dim: int, slice_sampler, num_workers: int +): """Test MCMC on Gaussian, comparing to ground truth target via c2st. Args: @@ -82,8 +83,8 @@ def test_c2st_slice_np_vectorized_on_Gaussian(num_dim: int, slice_sampler): """ num_samples = 500 - warmup = 500 - num_chains = 5 + warmup = 50 + num_chains = 10 if slice_sampler is SliceSamplerVectorized else 1 thin = 2 likelihood_shift = -1.0 * ones(num_dim) @@ -101,15 +102,11 @@ def lp_f(x): sampler = slice_sampler( log_prob_fn=lp_f, - init_params=np.zeros( - ( - num_chains, - num_dim, - ) - ).astype(np.float32), + init_params=np.zeros((num_chains, num_dim)).astype(np.float32), tuning=warmup, thin=thin, num_chains=num_chains, + num_workers=num_workers, ) samples = sampler.run(thin * (warmup + int(num_samples / num_chains))) assert samples.shape == ( @@ -130,60 +127,6 @@ def lp_f(x): check_c2st(samples, target_samples, alg=alg) -@pytest.mark.parametrize("vectorized", (False, True)) -@pytest.mark.parametrize("num_workers", (1, 10)) -def test_c2st_slice_np_parallelized(vectorized: bool, num_workers: int): - """Test MCMC on Gaussian, comparing to ground truth target via c2st. - - Args: - num_dim: parameter dimension of the gaussian model - - """ - num_dim = 2 - num_samples = 500 - warmup = 100 - num_chains = 10 - thin = 2 - - likelihood_shift = -1.0 * ones(num_dim) - likelihood_cov = 0.3 * eye(num_dim) - prior_mean = zeros(num_dim) - prior_cov = eye(num_dim) - x_o = zeros((1, num_dim)) - target_distribution = true_posterior_linear_gaussian_mvn_prior( - x_o[0], likelihood_shift, likelihood_cov, prior_mean, prior_cov - ) - target_samples = target_distribution.sample((num_samples,)) - - def lp_f(x): - return target_distribution.log_prob(torch.as_tensor(x, dtype=torch.float32)) - - initial_params = torch.zeros((num_chains, num_dim)) - - if not vectorized: - SliceSamplerMultiChain = SliceSamplerSerial - else: - SliceSamplerMultiChain = SliceSamplerVectorized - - posterior_sampler = SliceSamplerMultiChain( - init_params=tensor2numpy(initial_params), - log_prob_fn=lp_f, - num_chains=num_chains, - thin=thin, - verbose=False, - num_workers=num_workers, - ) - warmup_ = warmup * thin - num_samples_ = ceil((num_samples * thin) / num_chains) - samples = posterior_sampler.run(warmup_ + num_samples_) # chains x samples x dim - samples = samples[:, warmup:, :] # discard warmup steps - samples = torch.as_tensor(samples, dtype=torch.float32).reshape(-1, num_dim) - - check_c2st( - samples, target_samples, alg=f"slice_np {'vectorized' if vectorized else ''}" - ) - - @pytest.mark.parametrize( "method", ( @@ -194,9 +137,9 @@ def lp_f(x): "slice_np_vectorized", ), ) -@pytest.mark.parametrize("num_chains", (1, 2)) -def test_getting_inference_diagnostics(method, num_chains): - num_samples = 100 +def test_getting_inference_diagnostics(method): + num_simulations = 100 + num_samples = 10 num_dim = 2 # Use composed prior to test MultipleIndependent case. @@ -209,9 +152,11 @@ def test_getting_inference_diagnostics(method, num_chains): density_estimator = likelihood_nn("maf", num_transforms=3) inference = SNLE(density_estimator=density_estimator, show_progress_bars=False) - theta, x = simulate_for_sbi(simulator, prior, 1000, simulation_batch_size=50) + theta, x = simulate_for_sbi( + simulator, prior, num_simulations, simulation_batch_size=num_simulations + ) likelihood_estimator = inference.append_simulations(theta, x).train( - training_batch_size=100 + training_batch_size=num_simulations, max_num_epochs=2 ) x_o = zeros((1, num_dim)) @@ -222,10 +167,14 @@ def test_getting_inference_diagnostics(method, num_chains): proposal=prior, potential_fn=potential_fn, theta_transform=theta_transform, - thin=3, - num_chains=num_chains, + thin=2, + warmup_steps=10, + num_chains=1, + ) + posterior.sample( + sample_shape=(num_samples,), + method=method, ) - posterior.sample(sample_shape=(num_samples,), method=method) idata = posterior.get_arviz_inference_data() az.plot_trace(idata) From 982683e30da8c9cd571f844ca48357a88b8526c8 Mon Sep 17 00:00:00 2001 From: janfb Date: Thu, 25 Jan 2024 17:02:51 +0100 Subject: [PATCH 06/12] refactor sbc tests --- tests/posterior_sampler_test.py | 3 ++- tests/sbc_test.py | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/posterior_sampler_test.py b/tests/posterior_sampler_test.py index b397ee23f..b82b9d397 100644 --- a/tests/posterior_sampler_test.py +++ b/tests/posterior_sampler_test.py @@ -66,9 +66,10 @@ def test_api_posterior_sampler_set(sampling_method: str, set_seed): sample_shape=(num_samples, num_chains), x=x_o, mcmc_parameters={ - "thin": 3, + "thin": 2, "num_chains": num_chains, "init_strategy": "prior", + "warmup_steps": 10, }, ) diff --git a/tests/sbc_test.py b/tests/sbc_test.py index 63416ead6..5f9a9d437 100644 --- a/tests/sbc_test.py +++ b/tests/sbc_test.py @@ -49,7 +49,11 @@ def simulator(theta): training_batch_size=100, max_num_epochs=max_num_epochs ) if method == SNLE: - posterior_kwargs = {"sample_with": "mcmc" if sampler == "mcmc" else "vi"} + posterior_kwargs = { + "sample_with": "mcmc" if sampler == "mcmc" else "vi", + "mcmc_method": "slice_np_vectorized", + "mcmc_parameters": {"num_chains": 10, "thin": 5, "warmup_steps": 10}, + } else: posterior_kwargs = {} From 5fc268d8fde23867660539f1ca43256db9341e74 Mon Sep 17 00:00:00 2001 From: Jan Boelts Date: Fri, 26 Jan 2024 10:11:54 +0100 Subject: [PATCH 07/12] add fast multi round test for snle and snre --- tests/linearGaussian_snle_test.py | 51 ++++++++++++++++--------------- tests/linearGaussian_snre_test.py | 49 +++++++++++++++-------------- tests/multidimensional_x_test.py | 4 ++- 3 files changed, 54 insertions(+), 50 deletions(-) diff --git a/tests/linearGaussian_snle_test.py b/tests/linearGaussian_snle_test.py index a766e05a0..d685f4b1e 100644 --- a/tests/linearGaussian_snle_test.py +++ b/tests/linearGaussian_snle_test.py @@ -3,6 +3,8 @@ from __future__ import annotations +from os import X_OK + import pytest import torch from torch import eye, ones, zeros @@ -40,8 +42,9 @@ @pytest.mark.parametrize("num_dim", (1,)) # dim 3 is tested below. @pytest.mark.parametrize("prior_str", ("uniform", "gaussian")) -def test_api_snl_on_linearGaussian(num_dim: int, prior_str: str): - """Test SNLE API with different priors and different number of trials.""" +def test_api_snle_multiple_trials_and_rounds_map(num_dim: int, prior_str: str): + """Test SNLE API with 2 rounds, different priors num trials and MAP.""" + num_rounds = 2 num_samples = 10 num_simulations = 100 @@ -53,31 +56,31 @@ def test_api_snl_on_linearGaussian(num_dim: int, prior_str: str): prior = BoxUniform(-2.0 * ones(num_dim), 2.0 * ones(num_dim)) simulator, prior = prepare_for_sbi(diagonal_linear_gaussian, prior) - density_estimator = likelihood_nn("maf", num_transforms=3) - inference = SNLE(density_estimator=density_estimator, show_progress_bars=False) - - theta, x = simulate_for_sbi( - simulator, prior, num_simulations, simulation_batch_size=num_simulations - ) - likelihood_estimator = inference.append_simulations(theta, x).train( - training_batch_size=100, max_num_epochs=2 - ) + inference = SNLE(prior=prior, density_estimator="mdn", show_progress_bars=False) - for num_trials in [1, 2]: - x_o = zeros((num_trials, num_dim)) - potential_fn, theta_transform = likelihood_estimator_based_potential( - prior=prior, likelihood_estimator=likelihood_estimator, x_o=x_o + proposals = [prior] + for _ in range(num_rounds): + theta, x = simulate_for_sbi( + simulator, + proposals[-1], + num_simulations, + simulation_batch_size=num_simulations, ) - posterior = MCMCPosterior( - proposal=prior, - potential_fn=potential_fn, - theta_transform=theta_transform, - **mcmc_parameters, + inference.append_simulations(theta, x).train( + training_batch_size=100, max_num_epochs=2 ) - posterior.sample(sample_shape=(num_samples,)) - - -def test_c2st_snl_on_linearGaussian_different_dims(model_str="maf"): + for num_trials in [1, 3]: + x_o = zeros((num_trials, num_dim)) + posterior = inference.build_posterior( + mcmc_method="slice_np_vectorized", + mcmc_parameters=dict(num_chains=10, thin=5, warmup_steps=10), + ).set_default_x(x_o) + posterior.sample(sample_shape=(num_samples,)) + proposals.append(posterior) + posterior.map(num_iter=1) + + +def test_c2st_snl_on_linear_gaussian_different_dims(model_str="maf"): """Test SNLE on linear Gaussian task with different theta and x dims.""" theta_dim = 3 diff --git a/tests/linearGaussian_snre_test.py b/tests/linearGaussian_snre_test.py index 0c2a3be5b..aa6062c2e 100644 --- a/tests/linearGaussian_snre_test.py +++ b/tests/linearGaussian_snre_test.py @@ -46,37 +46,36 @@ @pytest.mark.parametrize("num_dim", (1,)) # dim 3 is tested below. @pytest.mark.parametrize("SNRE", (SNRE_B, SNRE_C)) -def test_api_sre_on_linearGaussian(num_dim: int, SNRE: RatioEstimator): - """Test inference API of SRE with linear Gaussian model. - - Avoids intense computation for fast testing of API etc. - - Args: - num_dim: parameter dimension of the Gaussian model - """ +def test_api_snre_multiple_trials_and_rounds_map(num_dim: int, SNRE: RatioEstimator): + """Test SNRE API with 2 rounds, different priors num trials and MAP.""" + num_rounds = 2 + num_samples = 10 + num_simulations = 100 prior = MultivariateNormal(loc=zeros(num_dim), covariance_matrix=eye(num_dim)) simulator, prior = prepare_for_sbi(diagonal_linear_gaussian, prior) - inference = SNRE(classifier="resnet", show_progress_bars=False) - - theta, x = simulate_for_sbi( - simulator, prior, num_simulations=100, simulation_batch_size=100 - ) - ratio_estimator = inference.append_simulations(theta, x).train(max_num_epochs=5) - - for num_trials in [1, 2]: - x_o = zeros(num_trials, num_dim) - potential_fn, theta_transform = ratio_estimator_based_potential( - ratio_estimator=ratio_estimator, prior=prior, x_o=x_o + inference = SNRE(prior=prior, classifier="mlp", show_progress_bars=False) + + proposals = [prior] + for _ in range(num_rounds): + theta, x = simulate_for_sbi( + simulator, + proposals[-1], + num_simulations, + simulation_batch_size=num_simulations, ) - posterior = MCMCPosterior( - potential_fn=potential_fn, - theta_transform=theta_transform, - proposal=prior, - **mcmc_parameters, + inference.append_simulations(theta, x).train( + training_batch_size=100, max_num_epochs=2 ) - posterior.sample(sample_shape=(10,)) + for num_trials in [1, 3]: + x_o = zeros((num_trials, num_dim)) + posterior = inference.build_posterior( + mcmc_method="slice_np_vectorized", + mcmc_parameters=dict(num_chains=10, thin=5, warmup_steps=10), + ).set_default_x(x_o) + posterior.sample(sample_shape=(num_samples,)) + proposals.append(posterior) posterior.map(num_iter=1) diff --git a/tests/multidimensional_x_test.py b/tests/multidimensional_x_test.py index b4a5650f4..7f13242a4 100644 --- a/tests/multidimensional_x_test.py +++ b/tests/multidimensional_x_test.py @@ -125,7 +125,9 @@ def test_inference_with_2d_x(embedding, method): theta_transform=theta_transform, proposal=prior, method="slice_np_vectorized", - num_chains=2, + num_chains=10, + warmup_steps=10, + thin=5, ) posterior.potential(posterior.sample((num_samples,), show_progress_bars=False)) From e34c905b704a38e5e84346ac4ce0a720abc8ef1a Mon Sep 17 00:00:00 2001 From: Jan Boelts Date: Fri, 26 Jan 2024 10:58:29 +0100 Subject: [PATCH 08/12] edits by black v24.1.0 --- sbi/inference/abc/abc_base.py | 1 + sbi/inference/abc/mcabc.py | 1 + sbi/inference/abc/smcabc.py | 1 + sbi/inference/posteriors/mcmc_posterior.py | 18 +++++++++--------- sbi/neural_nets/mnle.py | 6 +++--- sbi/utils/user_input_checks_utils.py | 6 +++--- tests/inference_on_device_test.py | 8 +++++--- 7 files changed, 23 insertions(+), 18 deletions(-) diff --git a/sbi/inference/abc/abc_base.py b/sbi/inference/abc/abc_base.py index 3540ffac9..1b20c9e24 100644 --- a/sbi/inference/abc/abc_base.py +++ b/sbi/inference/abc/abc_base.py @@ -1,4 +1,5 @@ """Base class for Approximate Bayesian Computation methods.""" + import logging from abc import ABC from typing import Callable, Union diff --git a/sbi/inference/abc/mcabc.py b/sbi/inference/abc/mcabc.py index 1b09d6a46..aac09bba9 100644 --- a/sbi/inference/abc/mcabc.py +++ b/sbi/inference/abc/mcabc.py @@ -1,4 +1,5 @@ """Monte-Carlo Approximate Bayesian Computation (Rejection ABC).""" + from typing import Any, Callable, Dict, Optional, Tuple, Union import torch diff --git a/sbi/inference/abc/smcabc.py b/sbi/inference/abc/smcabc.py index 3ece908f4..9c94fa8db 100644 --- a/sbi/inference/abc/smcabc.py +++ b/sbi/inference/abc/smcabc.py @@ -1,4 +1,5 @@ """Sequential Monte Carlo Approximate Bayesian Computation.""" + from typing import Any, Callable, Dict, Optional, Tuple, Union import numpy as np diff --git a/sbi/inference/posteriors/mcmc_posterior.py b/sbi/inference/posteriors/mcmc_posterior.py index d15896ab3..784309702 100644 --- a/sbi/inference/posteriors/mcmc_posterior.py +++ b/sbi/inference/posteriors/mcmc_posterior.py @@ -117,9 +117,9 @@ def __init__( v0.19.0. Instead, use e.g., `init_strategy_parameters={"num_candidate_samples": 1000}`""" ) - self.init_strategy_parameters[ - "num_candidate_samples" - ] = init_strategy_num_candidates + self.init_strategy_parameters["num_candidate_samples"] = ( + init_strategy_num_candidates + ) self.potential_ = self._prepare_potential(method) @@ -239,9 +239,9 @@ def sample( v0.19.0. Instead, use e.g., `init_strategy_parameters={"num_candidate_samples": 1000}`""" ) - self.init_strategy_parameters[ - "num_candidate_samples" - ] = init_strategy_num_candidates + self.init_strategy_parameters["num_candidate_samples"] = ( + init_strategy_num_candidates + ) if sample_with is not None: raise ValueError( f"You set `sample_with={sample_with}`. As of sbi v0.18.0, setting " @@ -661,9 +661,9 @@ def get_arviz_inference_data(self) -> InferenceData: self._posterior_sampler is not None ), """No samples have been generated, call .sample() first.""" - sampler: Union[ - MCMC, SliceSamplerSerial, SliceSamplerVectorized - ] = self._posterior_sampler + sampler: Union[MCMC, SliceSamplerSerial, SliceSamplerVectorized] = ( + self._posterior_sampler + ) # If Pyro sampler and samples not transformed, use arviz' from_pyro. # Exclude 'slice' kernel as it lacks the 'divergence' diagnostics key. diff --git a/sbi/neural_nets/mnle.py b/sbi/neural_nets/mnle.py index 16086c2c1..52022df40 100644 --- a/sbi/neural_nets/mnle.py +++ b/sbi/neural_nets/mnle.py @@ -81,9 +81,9 @@ def build_mnle( # Set up a NSF for modelling the continuous data, conditioned on the discrete data. cont_nle = build_nsf( - batch_x=torch.log(cont_x) - if log_transform_x - else cont_x, # log transform manually. + batch_x=( + torch.log(cont_x) if log_transform_x else cont_x + ), # log transform manually. batch_y=torch.cat((batch_y, disc_x), dim=1), # condition on discrete data too. z_score_y=z_score_y, z_score_x=z_score_x, diff --git a/sbi/utils/user_input_checks_utils.py b/sbi/utils/user_input_checks_utils.py index 5958e37b0..d36ceeb4e 100644 --- a/sbi/utils/user_input_checks_utils.py +++ b/sbi/utils/user_input_checks_utils.py @@ -166,9 +166,9 @@ def __init__( super().__init__( batch_shape=batch_shape, event_shape=event_shape, - validate_args=prior._validate_args - if validate_args is None - else validate_args, + validate_args=( + prior._validate_args if validate_args is None else validate_args + ), ) self.prior = prior diff --git a/tests/inference_on_device_test.py b/tests/inference_on_device_test.py index 894c8eeeb..5caa91434 100644 --- a/tests/inference_on_device_test.py +++ b/tests/inference_on_device_test.py @@ -423,9 +423,11 @@ def test_embedding_nets_integration_training_device( .to(data_device) ) - with pytest.warns( - UserWarning - ) if data_device != training_device else nullcontext(): + with ( + pytest.warns(UserWarning) + if data_device != training_device + else nullcontext() + ): density_estimator_append = inference.append_simulations(theta, X) density_estimator_train = density_estimator_append.train( From c2c08b6f2374de54f48b50bd28ec7c7cf6918443 Mon Sep 17 00:00:00 2001 From: Jan Boelts Date: Fri, 26 Jan 2024 13:18:18 +0100 Subject: [PATCH 09/12] refactor snle, snre and embedding net tests. --- tests/linearGaussian_snle_test.py | 2 +- tests/linearGaussian_snre_test.py | 2 +- tests/multidimensional_x_test.py | 53 ++++++++++--------------------- 3 files changed, 18 insertions(+), 39 deletions(-) diff --git a/tests/linearGaussian_snle_test.py b/tests/linearGaussian_snle_test.py index d685f4b1e..ed4d63c01 100644 --- a/tests/linearGaussian_snle_test.py +++ b/tests/linearGaussian_snle_test.py @@ -45,7 +45,7 @@ def test_api_snle_multiple_trials_and_rounds_map(num_dim: int, prior_str: str): """Test SNLE API with 2 rounds, different priors num trials and MAP.""" num_rounds = 2 - num_samples = 10 + num_samples = 1 num_simulations = 100 if prior_str == "gaussian": diff --git a/tests/linearGaussian_snre_test.py b/tests/linearGaussian_snre_test.py index aa6062c2e..636c955ef 100644 --- a/tests/linearGaussian_snre_test.py +++ b/tests/linearGaussian_snre_test.py @@ -50,7 +50,7 @@ def test_api_snre_multiple_trials_and_rounds_map(num_dim: int, SNRE: RatioEstima """Test SNRE API with 2 rounds, different priors num trials and MAP.""" num_rounds = 2 - num_samples = 10 + num_samples = 1 num_simulations = 100 prior = MultivariateNormal(loc=zeros(num_dim), covariance_matrix=eye(num_dim)) diff --git a/tests/multidimensional_x_test.py b/tests/multidimensional_x_test.py index 7f13242a4..037edf0df 100644 --- a/tests/multidimensional_x_test.py +++ b/tests/multidimensional_x_test.py @@ -60,16 +60,14 @@ def forward(self, x): SNLE, marks=pytest.mark.xfail(reason="SNLE cannot handle multiD x."), ), - pytest.param( - CNNEmbedding, - SNRE, - ), + pytest.param(CNNEmbedding, SNRE), pytest.param(CNNEmbedding, SNPE), ), ) -def test_inference_with_2d_x(embedding, method): +def test_inference_with_embedding_nets(embedding, method): + """Test embedding nets with SNPE, SNLE and SNRE.""" num_dim = 2 - num_samples = 10 + num_samples = 1 num_simulations = 100 prior = utils.BoxUniform(zeros(num_dim), torch.ones(num_dim)) @@ -79,10 +77,7 @@ def test_inference_with_2d_x(embedding, method): theta_o = torch.ones(1, num_dim) if method == SNPE: - net_provider = utils.posterior_nn( - model="mdn", - embedding_net=embedding(), - ) + net_provider = utils.posterior_nn(model="mdn", embedding_net=embedding()) num_trials = 1 elif method == SNLE: net_provider = utils.likelihood_nn(model="mdn", embedding_net=embedding()) @@ -99,35 +94,19 @@ def test_inference_with_2d_x(embedding, method): inference = method(classifier=net_provider, show_progress_bars=False) else: inference = method(density_estimator=net_provider, show_progress_bars=False) - theta, x = simulate_for_sbi(simulator, prior, num_simulations) - estimator = inference.append_simulations(theta, x).train( - training_batch_size=100, max_num_epochs=10 + theta, x = simulate_for_sbi( + simulator, prior, num_simulations, simulation_batch_size=num_simulations + ) + inference.append_simulations(theta, x).train( + training_batch_size=100, max_num_epochs=2 ) x_o = simulator(theta_o.repeat(num_trials, 1)) - if method == SNLE: - potential_fn, theta_transform = likelihood_estimator_based_potential( - estimator, prior, x_o - ) - elif method == SNPE: - potential_fn, theta_transform = posterior_estimator_based_potential( - estimator, prior, x_o - ) - elif method == SNRE: - potential_fn, theta_transform = ratio_estimator_based_potential( - estimator, prior, x_o - ) - else: - raise NotImplementedError - - posterior = MCMCPosterior( - potential_fn=potential_fn, - theta_transform=theta_transform, - proposal=prior, - method="slice_np_vectorized", - num_chains=10, - warmup_steps=10, - thin=5, - ) + posterior = inference.build_posterior( + prior=prior, + sample_with="rejection" if method == SNPE else "mcmc", + mcmc_method="slice_np_vectorized", + mcmc_parameters={"num_chains": 10, "thin": 5, "warmup_steps": 5}, + ).set_default_x(x_o) posterior.potential(posterior.sample((num_samples,), show_progress_bars=False)) From be133dc172846135d1640dcbb5d807acefe0e985 Mon Sep 17 00:00:00 2001 From: Jan Boelts Date: Fri, 26 Jan 2024 13:27:06 +0100 Subject: [PATCH 10/12] remove high d x test in favor of embedding tests. --- tests/embedding_net_test.py | 11 ++- tests/multidimensional_x_test.py | 112 ------------------------------- 2 files changed, 5 insertions(+), 118 deletions(-) delete mode 100644 tests/multidimensional_x_test.py diff --git a/tests/embedding_net_test.py b/tests/embedding_net_test.py index 5a97aa985..e9222e167 100644 --- a/tests/embedding_net_test.py +++ b/tests/embedding_net_test.py @@ -3,8 +3,9 @@ import pytest import torch from torch import eye, ones, zeros +from torch.distributions import MultivariateNormal -from sbi import utils as utils +from sbi import utils from sbi.inference import SNLE, SNPE, SNRE, simulate_for_sbi from sbi.neural_nets.embedding_nets import ( CNNEmbedding, @@ -70,7 +71,8 @@ def test_embedding_net_api(method, num_dim: int, embedding_net: str): @pytest.mark.parametrize("num_trials", [1, 2]) @pytest.mark.parametrize("num_dim", [1, 2]) -def test_iid_embedding_api(num_trials, num_dim): +def test_embedding_api_with_multiple_trials(num_trials, num_dim): + """Tests the API when using iid trial-based data.""" prior = utils.BoxUniform(-2.0 * ones(num_dim), 2.0 * ones(num_dim)) num_thetas = 1000 @@ -100,7 +102,7 @@ def test_iid_embedding_api(num_trials, num_dim): @pytest.mark.slow def test_iid_embedding_varying_num_trials(trial_factor=50, max_num_trials=20): - """Test embedding net with varying number of trials.""" + """Test inference accuracy with embeddings for varying number of trials.""" num_dim = 2 prior = torch.distributions.MultivariateNormal( torch.zeros(num_dim), torch.eye(num_dim) @@ -244,9 +246,6 @@ def simulator(theta, num_trials=num_trials): @pytest.mark.parametrize("input_shape", [(32,), (32, 32), (32, 64)]) @pytest.mark.parametrize("num_channels", (1, 2, 3)) def test_1d_and_2d_cnn_embedding_net(input_shape, num_channels): - import torch - from torch.distributions import MultivariateNormal - estimator_provider = posterior_nn( "mdn", embedding_net=CNNEmbedding( diff --git a/tests/multidimensional_x_test.py b/tests/multidimensional_x_test.py deleted file mode 100644 index 037edf0df..000000000 --- a/tests/multidimensional_x_test.py +++ /dev/null @@ -1,112 +0,0 @@ -from __future__ import annotations - -import pytest -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch import zeros - -from sbi import utils as utils -from sbi.inference import ( - SNLE, - SNPE, - SNRE, - MCMCPosterior, - likelihood_estimator_based_potential, - posterior_estimator_based_potential, - prepare_for_sbi, - ratio_estimator_based_potential, - simulate_for_sbi, -) - - -# Minimal 2D simulator. -def simulator_2d(theta): - x = torch.rand(theta.shape[0], 32, 32) - return x - - -class CNNEmbedding(nn.Module): - """Big CNN embedding net to levarage GPU computation.""" - - def __init__(self): - super().__init__() - # 2D convolutional layer - self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5, padding=2) - self.conv2 = nn.Conv2d(in_channels=16, out_channels=6, kernel_size=5, padding=2) - # Maxpool layer that reduces 32x32 image to 4x4 - self.pool = nn.MaxPool2d(kernel_size=8, stride=8) - # Fully connected layer taking as input the 6 flattened output arrays from the - # maxpooling layer - self.fc = nn.Linear(in_features=6 * 4 * 4, out_features=8) - - def forward(self, x): - x = x.view(-1, 1, 32, 32) - x = F.relu(self.conv1(x)) - x = self.pool(F.relu(self.conv2(x))) - x = x.reshape(-1, 6 * 4 * 4) - x = F.relu(self.fc(x)) - return x - - -@pytest.mark.parametrize( - "embedding, method", - ( - pytest.param( - nn.Identity, SNPE, marks=pytest.mark.xfail(reason="Invalid embedding.") - ), - pytest.param( - nn.Identity, - SNLE, - marks=pytest.mark.xfail(reason="SNLE cannot handle multiD x."), - ), - pytest.param(CNNEmbedding, SNRE), - pytest.param(CNNEmbedding, SNPE), - ), -) -def test_inference_with_embedding_nets(embedding, method): - """Test embedding nets with SNPE, SNLE and SNRE.""" - num_dim = 2 - num_samples = 1 - num_simulations = 100 - - prior = utils.BoxUniform(zeros(num_dim), torch.ones(num_dim)) - - simulator, prior = prepare_for_sbi(simulator_2d, prior) - - theta_o = torch.ones(1, num_dim) - - if method == SNPE: - net_provider = utils.posterior_nn(model="mdn", embedding_net=embedding()) - num_trials = 1 - elif method == SNLE: - net_provider = utils.likelihood_nn(model="mdn", embedding_net=embedding()) - num_trials = 2 - else: - net_provider = utils.classifier_nn( - model="mlp", - z_score_theta="structured", # Test that structured z-scoring works. - embedding_net_x=embedding(), - ) - num_trials = 2 - - if method == SNRE: - inference = method(classifier=net_provider, show_progress_bars=False) - else: - inference = method(density_estimator=net_provider, show_progress_bars=False) - theta, x = simulate_for_sbi( - simulator, prior, num_simulations, simulation_batch_size=num_simulations - ) - inference.append_simulations(theta, x).train( - training_batch_size=100, max_num_epochs=2 - ) - x_o = simulator(theta_o.repeat(num_trials, 1)) - - posterior = inference.build_posterior( - prior=prior, - sample_with="rejection" if method == SNPE else "mcmc", - mcmc_method="slice_np_vectorized", - mcmc_parameters={"num_chains": 10, "thin": 5, "warmup_steps": 5}, - ).set_default_x(x_o) - - posterior.potential(posterior.sample((num_samples,), show_progress_bars=False)) From 40b74388475e82cf01eb0dfb359bfee4aea995ac Mon Sep 17 00:00:00 2001 From: Jan Boelts Date: Fri, 26 Jan 2024 13:28:12 +0100 Subject: [PATCH 11/12] remove spurios import. --- tests/linearGaussian_snle_test.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/linearGaussian_snle_test.py b/tests/linearGaussian_snle_test.py index ed4d63c01..c65ff8983 100644 --- a/tests/linearGaussian_snle_test.py +++ b/tests/linearGaussian_snle_test.py @@ -3,8 +3,6 @@ from __future__ import annotations -from os import X_OK - import pytest import torch from torch import eye, ones, zeros From bd4c83be982a898fdc70ce01c07f62ab64088200 Mon Sep 17 00:00:00 2001 From: Jan Boelts Date: Fri, 26 Jan 2024 13:48:02 +0100 Subject: [PATCH 12/12] refactoring --- tests/linearGaussian_snle_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/linearGaussian_snle_test.py b/tests/linearGaussian_snle_test.py index c65ff8983..d8f1974b1 100644 --- a/tests/linearGaussian_snle_test.py +++ b/tests/linearGaussian_snle_test.py @@ -133,11 +133,11 @@ def test_c2st_snl_on_linear_gaussian_different_dims(model_str="maf"): samples = posterior.sample((num_samples,)) # Compute the c2st and assert it is near chance level of 0.5. - check_c2st(samples, target_samples, alg=f"snle_a-model_str") + check_c2st(samples, target_samples, alg=f"snle_a-{model_str}") @pytest.mark.slow -@pytest.mark.parametrize("num_dim", (2,)) +@pytest.mark.parametrize("num_dim", (1, 2)) @pytest.mark.parametrize("prior_str", ("uniform", "gaussian")) def test_c2st_and_map_snl_on_linearGaussian_different(num_dim: int, prior_str: str): """Test SNL on linear Gaussian, comparing to ground truth posterior via c2st.