From 16597684916caa133ffbfe94514d96e791eaacc1 Mon Sep 17 00:00:00 2001 From: Michael Deistler Date: Fri, 3 Nov 2023 09:59:06 +0100 Subject: [PATCH] Deployed bfe2d2c with MkDocs version: 1.5.3 --- 404.html | 34 +-- citation/index.html | 34 +-- code_of_conduct/index.html | 34 +-- contribute/index.html | 34 +-- credits/index.html | 34 +-- examples/00_HH_simulator/index.html | 34 +-- examples/01_decision_making_model/index.html | 34 +-- faq/index.html | 34 +-- faq/question_01/index.html | 34 +-- faq/question_02/index.html | 34 +-- faq/question_03/index.html | 34 +-- faq/question_04/index.html | 34 +-- faq/question_05/index.html | 34 +-- faq/question_06/index.html | 34 +-- faq/question_07/index.html | 34 +-- index.html | 43 ++-- install/index.html | 34 +-- reference/index.html | 34 +-- search/search_index.json | 2 +- sitemap.xml | 72 +++--- sitemap.xml.gz | Bin 648 -> 648 bytes tutorial/00_getting_started/index.html | 151 ++---------- tutorial/01_gaussian_amortized/index.html | 82 +++---- tutorial/02_flexible_interface/index.html | 46 ++-- tutorial/03_multiround_inference/index.html | 38 +-- tutorial/04_density_estimators/index.html | 38 +-- tutorial/05_embedding_net/index.html | 38 +-- .../07_conditional_distributions/index.html | 34 +-- tutorial/08_restriction_estimator/index.html | 54 ++--- tutorial/09_sensitivity_analysis/index.html | 34 +-- .../10_crafting_summary_statistics/index.html | 54 ++--- tutorial/11_sampler_interface/index.html | 219 +++++++----------- .../index.html | 38 +-- .../index.html | 34 +-- .../index.html | 121 +++++----- .../index.html | 34 +-- .../15_mcmc_diagnostics_with_arviz/index.html | 34 +-- tutorial/16_implemented_methods/index.html | 38 +-- .../index.html | 34 +-- tutorial/17_vi_posteriors/index.html | 134 ++--------- 40 files changed, 600 insertions(+), 1350 deletions(-) diff --git a/404.html b/404.html index a5afcb9c4..9a38a1ec9 100644 --- a/404.html +++ b/404.html @@ -302,20 +302,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -331,8 +317,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -406,8 +392,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -448,8 +434,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -462,8 +448,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -476,8 +462,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • diff --git a/citation/index.html b/citation/index.html index 2ce834eff..2a454de6c 100644 --- a/citation/index.html +++ b/citation/index.html @@ -309,20 +309,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -338,8 +324,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -413,8 +399,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -455,8 +441,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -469,8 +455,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -483,8 +469,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • diff --git a/code_of_conduct/index.html b/code_of_conduct/index.html index 35fc3ace9..c8832011a 100644 --- a/code_of_conduct/index.html +++ b/code_of_conduct/index.html @@ -309,20 +309,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -338,8 +324,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -413,8 +399,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -455,8 +441,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -469,8 +455,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -483,8 +469,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • diff --git a/contribute/index.html b/contribute/index.html index 4f8721707..a4a5a9605 100644 --- a/contribute/index.html +++ b/contribute/index.html @@ -309,20 +309,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -338,8 +324,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -413,8 +399,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -455,8 +441,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -469,8 +455,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -483,8 +469,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • diff --git a/credits/index.html b/credits/index.html index d23fd6256..81702d08a 100644 --- a/credits/index.html +++ b/credits/index.html @@ -309,20 +309,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -338,8 +324,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -413,8 +399,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -455,8 +441,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -469,8 +455,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -483,8 +469,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • diff --git a/examples/00_HH_simulator/index.html b/examples/00_HH_simulator/index.html index da069cdb6..ffb2308a5 100644 --- a/examples/00_HH_simulator/index.html +++ b/examples/00_HH_simulator/index.html @@ -311,20 +311,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -340,8 +326,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -415,8 +401,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -457,8 +443,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -471,8 +457,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -485,8 +471,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • diff --git a/examples/01_decision_making_model/index.html b/examples/01_decision_making_model/index.html index 64f13e499..7163a42b9 100644 --- a/examples/01_decision_making_model/index.html +++ b/examples/01_decision_making_model/index.html @@ -311,20 +311,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -340,8 +326,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -415,8 +401,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -457,8 +443,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -471,8 +457,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -485,8 +471,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • diff --git a/faq/index.html b/faq/index.html index 9df49ea77..b01c686c9 100644 --- a/faq/index.html +++ b/faq/index.html @@ -309,20 +309,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -338,8 +324,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -413,8 +399,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -455,8 +441,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -469,8 +455,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -483,8 +469,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • diff --git a/faq/question_01/index.html b/faq/question_01/index.html index 9b83267e5..07b98ec13 100644 --- a/faq/question_01/index.html +++ b/faq/question_01/index.html @@ -309,20 +309,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -338,8 +324,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -413,8 +399,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -455,8 +441,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -469,8 +455,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -483,8 +469,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • diff --git a/faq/question_02/index.html b/faq/question_02/index.html index 9d8449f24..49bffecba 100644 --- a/faq/question_02/index.html +++ b/faq/question_02/index.html @@ -309,20 +309,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -338,8 +324,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -413,8 +399,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -455,8 +441,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -469,8 +455,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -483,8 +469,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • diff --git a/faq/question_03/index.html b/faq/question_03/index.html index 11ea0c1ae..14349891b 100644 --- a/faq/question_03/index.html +++ b/faq/question_03/index.html @@ -309,20 +309,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -338,8 +324,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -413,8 +399,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -455,8 +441,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -469,8 +455,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -483,8 +469,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • diff --git a/faq/question_04/index.html b/faq/question_04/index.html index 03975a645..db6c7b4e3 100644 --- a/faq/question_04/index.html +++ b/faq/question_04/index.html @@ -309,20 +309,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -338,8 +324,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -413,8 +399,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -455,8 +441,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -469,8 +455,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -483,8 +469,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • diff --git a/faq/question_05/index.html b/faq/question_05/index.html index 198e504f3..af7aa1ef7 100644 --- a/faq/question_05/index.html +++ b/faq/question_05/index.html @@ -309,20 +309,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -338,8 +324,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -413,8 +399,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -455,8 +441,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -469,8 +455,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -483,8 +469,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • diff --git a/faq/question_06/index.html b/faq/question_06/index.html index 1fffdd075..c567b4e98 100644 --- a/faq/question_06/index.html +++ b/faq/question_06/index.html @@ -309,20 +309,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -338,8 +324,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -413,8 +399,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -455,8 +441,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -469,8 +455,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -483,8 +469,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • diff --git a/faq/question_07/index.html b/faq/question_07/index.html index c74b488f3..0619e325e 100644 --- a/faq/question_07/index.html +++ b/faq/question_07/index.html @@ -309,20 +309,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -338,8 +324,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -413,8 +399,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -455,8 +441,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -469,8 +455,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -483,8 +469,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • diff --git a/index.html b/index.html index 0a17728a4..871f116fc 100644 --- a/index.html +++ b/index.html @@ -399,20 +399,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -428,8 +414,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -503,8 +489,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -545,8 +531,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -559,8 +545,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -573,8 +559,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • @@ -1000,10 +986,15 @@

    sbi: simulation-based inference

    sbi: A Python toolbox for simulation-based inference.

    using sbi

    -

    Inference can be run in a single line of code:

    +

    Inference can be run in a single line of code

    posterior = infer(simulator, prior, method='SNPE', num_simulations=1000)
     
    -

    and you can choose from a variety of amortized and sequential SBI methods.

    +

    or in a few lines for more flexibility:

    +
    inference = SNPE(prior=prior)
    +_ = inference.append_simulations(theta, x).train()
    +posterior = inference.build_posterior()
    +
    +

    sbi lets you choose from a variety of amortized and sequential SBI methods:

    Amortized methods return a posterior that can be applied to many different observations without retraining, whereas sequential methods focus the inference on one particular observation to be more simulation-efficient. For an overview of implemented methods see below, or checkout or GitHub page.

    diff --git a/install/index.html b/install/index.html index 07a4b9903..d779b7f25 100644 --- a/install/index.html +++ b/install/index.html @@ -319,20 +319,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -348,8 +334,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -423,8 +409,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -465,8 +451,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -479,8 +465,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -493,8 +479,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • diff --git a/reference/index.html b/reference/index.html index d368e03aa..bf5e20113 100644 --- a/reference/index.html +++ b/reference/index.html @@ -309,20 +309,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -338,8 +324,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -413,8 +399,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -455,8 +441,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -469,8 +455,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -483,8 +469,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • diff --git a/search/search_index.json b/search/search_index.json index 0cb0952cb..16afa2e45 100644 --- a/search/search_index.json +++ b/search/search_index.json @@ -1 +1 @@ -{"config":{"indexing":"full","lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"sbi : simulation-based inference \u00b6 sbi : A Python toolbox for simulation-based inference. Inference can be run in a single line of code: posterior = infer ( simulator , prior , method = 'SNPE' , num_simulations = 1000 ) and you can choose from a variety of amortized and sequential SBI methods. Amortized methods return a posterior that can be applied to many different observations without retraining, whereas sequential methods focus the inference on one particular observation to be more simulation-efficient. For an overview of implemented methods see below, or checkout or GitHub page . Overview \u00b6 To learn about the general motivation behind simulation-based inference, and the inference methods included in sbi , read on below. For example applications to canonical problems in neuroscience, browse the recent research article Training deep neural density estimators to identify mechanistic models of neural dynamics . If you want to get started using sbi on your own problem, jump to installation and then check out the tutorial . Motivation and approach \u00b6 Many areas of science and engineering make extensive use of complex, stochastic, numerical simulations to describe the structure and dynamics of the processes being investigated. A key challenge in simulation-based science is constraining these simulation models\u2019 parameters, which are intepretable quantities, with observational data. Bayesian inference provides a general and powerful framework to invert the simulators, i.e. describe the parameters which are consistent both with empirical data and prior knowledge. In the case of simulators, a key quantity required for statistical inference, the likelihood of observed data given parameters, \\(\\mathcal{L}(\\theta) = p(x_o|\\theta)\\) , is typically intractable, rendering conventional statistical approaches inapplicable. sbi implements powerful machine-learning methods that address this problem. Roughly, these algorithms can be categorized as: Neural Posterior Estimation (amortized NPE and sequential SNPE ), Neural Likelihood Estimation ( (S)NLE ), and Neural Ratio Estimation ( (S)NRE ). Depending on the characteristics of the problem, e.g. the dimensionalities of the parameter space and the observation space, one of the methods will be more suitable. Goal: Algorithmically identify mechanistic models which are consistent with data. Each of the methods above needs three inputs: A candidate mechanistic model, prior knowledge or constraints on model parameters, and observational data (or summary statistics thereof). The methods then proceed by sampling parameters from the prior followed by simulating synthetic data from these parameters, learning the (probabilistic) association between data (or data features) and underlying parameters, i.e. to learn statistical inference from simulated data. The way in which this association is learned differs between the above methods, but all use deep neural networks. This learned neural network is then applied to empirical data to derive the full space of parameters consistent with the data and the prior, i.e. the posterior distribution. High posterior probability is assigned to parameters which are consistent with both the data and the prior, low probability to inconsistent parameters. While SNPE directly learns the posterior distribution, SNLE and SNRE need an extra MCMC sampling step to construct a posterior. If needed, an initial estimate of the posterior can be used to adaptively generate additional informative simulations. Publications \u00b6 See Cranmer, Brehmer, Louppe (2020) for a recent review on simulation-based inference. The following papers offer additional details on the inference methods implemented in sbi . You can find a tutorial on how to run each of these methods here . Posterior estimation ( (S)NPE ) \u00b6 Fast \u03b5-free Inference of Simulation Models with Bayesian Conditional Density Estimation by Papamakarios & Murray (NeurIPS 2016) [PDF] [BibTeX] Flexible statistical inference for mechanistic models of neural dynamics by Lueckmann, Goncalves, Bassetto, \u00d6cal, Nonnenmacher & Macke (NeurIPS 2017) [PDF] [BibTeX] Automatic posterior transformation for likelihood-free inference by Greenberg, Nonnenmacher & Macke (ICML 2019) [PDF] [BibTeX] Truncated proposals for scalable and hassle-free simulation-based inference by Deistler, Goncalves & Macke (NeurIPS 2022) [Paper] Likelihood-estimation ( (S)NLE ) \u00b6 Sequential neural likelihood: Fast likelihood-free inference with autoregressive flows by Papamakarios, Sterratt & Murray (AISTATS 2019) [PDF] [BibTeX] Variational methods for simulation-based inference by Gl\u00f6ckler, Deistler, Macke (ICLR 2022) [Paper] Flexible and efficient simulation-based inference for models of decision-making by Boelts, Lueckmann, Gao, Macke (Elife 2022) [Paper] Likelihood-ratio-estimation ( (S)NRE ) \u00b6 Likelihood-free MCMC with Amortized Approximate Likelihood Ratios by Hermans, Begy & Louppe (ICML 2020) [PDF] On Contrastive Learning for Likelihood-free Inference Durkan, Murray & Papamakarios (ICML 2020) [PDF] Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation by Delaunoy, Hermans, Rozet, Wehenkel & Louppe (NeurIPS 2022) [PDF] Contrastive Neural Ratio Estimation Benjamin Kurt Miller, Christoph Weniger, Patrick Forr\u00e9 (NeurIPS 2022) [PDF] Utilities \u00b6 Restriction estimator by Deistler, Macke & Goncalves (PNAS 2022) [Paper] Simulation-based calibration by Talts, Betancourt, Simpson, Vehtari, Gelman (arxiv 2018) [Paper] ) Expected coverage (sample-based) as computed in Deistler, Goncalves, Macke [Paper] and in Rozet, Louppe [Paper]","title":"Home"},{"location":"#sbi-simulation-based-inference","text":"sbi : A Python toolbox for simulation-based inference. Inference can be run in a single line of code: posterior = infer ( simulator , prior , method = 'SNPE' , num_simulations = 1000 ) and you can choose from a variety of amortized and sequential SBI methods. Amortized methods return a posterior that can be applied to many different observations without retraining, whereas sequential methods focus the inference on one particular observation to be more simulation-efficient. For an overview of implemented methods see below, or checkout or GitHub page .","title":"sbi: simulation-based inference"},{"location":"#overview","text":"To learn about the general motivation behind simulation-based inference, and the inference methods included in sbi , read on below. For example applications to canonical problems in neuroscience, browse the recent research article Training deep neural density estimators to identify mechanistic models of neural dynamics . If you want to get started using sbi on your own problem, jump to installation and then check out the tutorial .","title":"Overview"},{"location":"#motivation-and-approach","text":"Many areas of science and engineering make extensive use of complex, stochastic, numerical simulations to describe the structure and dynamics of the processes being investigated. A key challenge in simulation-based science is constraining these simulation models\u2019 parameters, which are intepretable quantities, with observational data. Bayesian inference provides a general and powerful framework to invert the simulators, i.e. describe the parameters which are consistent both with empirical data and prior knowledge. In the case of simulators, a key quantity required for statistical inference, the likelihood of observed data given parameters, \\(\\mathcal{L}(\\theta) = p(x_o|\\theta)\\) , is typically intractable, rendering conventional statistical approaches inapplicable. sbi implements powerful machine-learning methods that address this problem. Roughly, these algorithms can be categorized as: Neural Posterior Estimation (amortized NPE and sequential SNPE ), Neural Likelihood Estimation ( (S)NLE ), and Neural Ratio Estimation ( (S)NRE ). Depending on the characteristics of the problem, e.g. the dimensionalities of the parameter space and the observation space, one of the methods will be more suitable. Goal: Algorithmically identify mechanistic models which are consistent with data. Each of the methods above needs three inputs: A candidate mechanistic model, prior knowledge or constraints on model parameters, and observational data (or summary statistics thereof). The methods then proceed by sampling parameters from the prior followed by simulating synthetic data from these parameters, learning the (probabilistic) association between data (or data features) and underlying parameters, i.e. to learn statistical inference from simulated data. The way in which this association is learned differs between the above methods, but all use deep neural networks. This learned neural network is then applied to empirical data to derive the full space of parameters consistent with the data and the prior, i.e. the posterior distribution. High posterior probability is assigned to parameters which are consistent with both the data and the prior, low probability to inconsistent parameters. While SNPE directly learns the posterior distribution, SNLE and SNRE need an extra MCMC sampling step to construct a posterior. If needed, an initial estimate of the posterior can be used to adaptively generate additional informative simulations.","title":"Motivation and approach"},{"location":"#publications","text":"See Cranmer, Brehmer, Louppe (2020) for a recent review on simulation-based inference. The following papers offer additional details on the inference methods implemented in sbi . You can find a tutorial on how to run each of these methods here .","title":"Publications"},{"location":"#posterior-estimation-snpe","text":"Fast \u03b5-free Inference of Simulation Models with Bayesian Conditional Density Estimation by Papamakarios & Murray (NeurIPS 2016) [PDF] [BibTeX] Flexible statistical inference for mechanistic models of neural dynamics by Lueckmann, Goncalves, Bassetto, \u00d6cal, Nonnenmacher & Macke (NeurIPS 2017) [PDF] [BibTeX] Automatic posterior transformation for likelihood-free inference by Greenberg, Nonnenmacher & Macke (ICML 2019) [PDF] [BibTeX] Truncated proposals for scalable and hassle-free simulation-based inference by Deistler, Goncalves & Macke (NeurIPS 2022) [Paper]","title":"Posterior estimation ((S)NPE)"},{"location":"#likelihood-estimation-snle","text":"Sequential neural likelihood: Fast likelihood-free inference with autoregressive flows by Papamakarios, Sterratt & Murray (AISTATS 2019) [PDF] [BibTeX] Variational methods for simulation-based inference by Gl\u00f6ckler, Deistler, Macke (ICLR 2022) [Paper] Flexible and efficient simulation-based inference for models of decision-making by Boelts, Lueckmann, Gao, Macke (Elife 2022) [Paper]","title":"Likelihood-estimation ((S)NLE)"},{"location":"#likelihood-ratio-estimation-snre","text":"Likelihood-free MCMC with Amortized Approximate Likelihood Ratios by Hermans, Begy & Louppe (ICML 2020) [PDF] On Contrastive Learning for Likelihood-free Inference Durkan, Murray & Papamakarios (ICML 2020) [PDF] Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation by Delaunoy, Hermans, Rozet, Wehenkel & Louppe (NeurIPS 2022) [PDF] Contrastive Neural Ratio Estimation Benjamin Kurt Miller, Christoph Weniger, Patrick Forr\u00e9 (NeurIPS 2022) [PDF]","title":"Likelihood-ratio-estimation ((S)NRE)"},{"location":"#utilities","text":"Restriction estimator by Deistler, Macke & Goncalves (PNAS 2022) [Paper] Simulation-based calibration by Talts, Betancourt, Simpson, Vehtari, Gelman (arxiv 2018) [Paper] ) Expected coverage (sample-based) as computed in Deistler, Goncalves, Macke [Paper] and in Rozet, Louppe [Paper]","title":"Utilities"},{"location":"citation/","text":"Citation \u00b6 If you use sbi consider citing the sbi software paper , in addition to the original research articles describing the specific sbi-algorithm(s) you are using. @article { tejero-cantero2020sbi, doi = { 10.21105/joss.02505 } , url = { https://doi.org/10.21105/joss.02505 } , year = { 2020 } , publisher = { The Open Journal } , volume = { 5 } , number = { 52 } , pages = { 2505 } , author = { Alvaro Tejero-Cantero and Jan Boelts and Michael Deistler and Jan-Matthis Lueckmann and Conor Durkan and Pedro J. Gon\u00e7alves and David S. Greenberg and Jakob H. Macke } , title = { sbi: A toolkit for simulation-based inference } , journal = { Journal of Open Source Software } } The above citation refers to the original version of the sbi project and has a persistent DOI. Additionally, new releases of sbi are citable via Zenodo , where we create a new DOI for every release.","title":"Citation"},{"location":"citation/#citation","text":"If you use sbi consider citing the sbi software paper , in addition to the original research articles describing the specific sbi-algorithm(s) you are using. @article { tejero-cantero2020sbi, doi = { 10.21105/joss.02505 } , url = { https://doi.org/10.21105/joss.02505 } , year = { 2020 } , publisher = { The Open Journal } , volume = { 5 } , number = { 52 } , pages = { 2505 } , author = { Alvaro Tejero-Cantero and Jan Boelts and Michael Deistler and Jan-Matthis Lueckmann and Conor Durkan and Pedro J. Gon\u00e7alves and David S. Greenberg and Jakob H. Macke } , title = { sbi: A toolkit for simulation-based inference } , journal = { Journal of Open Source Software } } The above citation refers to the original version of the sbi project and has a persistent DOI. Additionally, new releases of sbi are citable via Zenodo , where we create a new DOI for every release.","title":"Citation"},{"location":"code_of_conduct/","text":"Contributor Covenant Code of Conduct \u00b6 Our Pledge \u00b6 We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. Our Standards \u00b6 Examples of behavior that contributes to a positive environment for our community include: Demonstrating empathy and kindness toward other people Being respectful of differing opinions, viewpoints, and experiences Giving and gracefully accepting constructive feedback Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: The use of sexualized language or imagery, and sexual attention or advances of any kind Trolling, insulting or derogatory comments, and personal or political attacks Public or private harassment Publishing others\u2019 private information, such as a physical or email address, without their explicit permission Other conduct which could reasonably be considered inappropriate in a professional setting Enforcement Responsibilities \u00b6 Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. Scope \u00b6 This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Enforcement \u00b6 Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting sbi developer Jan Boelts via email ( jan.boelts@uni-tuebingen.de ). All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the reporter of any incident. Enforcement Guidelines \u00b6 Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct: 1. Correction \u00b6 Community Impact : Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. Consequence : A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested. 2. Warning \u00b6 Community Impact : A violation through a single incident or series of actions. Consequence : A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban. 3. Temporary Ban \u00b6 Community Impact : A serious violation of community standards, including sustained inappropriate behavior. Consequence : A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban. 4. Permanent Ban \u00b6 Community Impact : Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. Consequence : A permanent ban from any sort of public interaction within the community. Attribution \u00b6 This Code of Conduct is adapted from the Contributor Covenant , version 2.1, available at https://www.contributor-covenant.org/version/2/1/code_of_conduct.html . Community Impact Guidelines were inspired by Mozilla\u2019s code of conduct enforcement ladder . For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq . Translations are available at https://www.contributor-covenant.org/translations .","title":"Code of Conduct"},{"location":"code_of_conduct/#contributor-covenant-code-of-conduct","text":"","title":"Contributor Covenant Code of Conduct"},{"location":"code_of_conduct/#our-pledge","text":"We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community.","title":"Our Pledge"},{"location":"code_of_conduct/#our-standards","text":"Examples of behavior that contributes to a positive environment for our community include: Demonstrating empathy and kindness toward other people Being respectful of differing opinions, viewpoints, and experiences Giving and gracefully accepting constructive feedback Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: The use of sexualized language or imagery, and sexual attention or advances of any kind Trolling, insulting or derogatory comments, and personal or political attacks Public or private harassment Publishing others\u2019 private information, such as a physical or email address, without their explicit permission Other conduct which could reasonably be considered inappropriate in a professional setting","title":"Our Standards"},{"location":"code_of_conduct/#enforcement-responsibilities","text":"Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate.","title":"Enforcement Responsibilities"},{"location":"code_of_conduct/#scope","text":"This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event.","title":"Scope"},{"location":"code_of_conduct/#enforcement","text":"Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting sbi developer Jan Boelts via email ( jan.boelts@uni-tuebingen.de ). All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the reporter of any incident.","title":"Enforcement"},{"location":"code_of_conduct/#enforcement-guidelines","text":"Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct:","title":"Enforcement Guidelines"},{"location":"code_of_conduct/#1-correction","text":"Community Impact : Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. Consequence : A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested.","title":"1. Correction"},{"location":"code_of_conduct/#2-warning","text":"Community Impact : A violation through a single incident or series of actions. Consequence : A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban.","title":"2. Warning"},{"location":"code_of_conduct/#3-temporary-ban","text":"Community Impact : A serious violation of community standards, including sustained inappropriate behavior. Consequence : A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban.","title":"3. Temporary Ban"},{"location":"code_of_conduct/#4-permanent-ban","text":"Community Impact : Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. Consequence : A permanent ban from any sort of public interaction within the community.","title":"4. Permanent Ban"},{"location":"code_of_conduct/#attribution","text":"This Code of Conduct is adapted from the Contributor Covenant , version 2.1, available at https://www.contributor-covenant.org/version/2/1/code_of_conduct.html . Community Impact Guidelines were inspired by Mozilla\u2019s code of conduct enforcement ladder . For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq . Translations are available at https://www.contributor-covenant.org/translations .","title":"Attribution"},{"location":"contribute/","text":"User experiences, bugs, and feature requests \u00b6 If you are using sbi to infer the parameters of a simulator, we would be delighted to know how it worked for you. If it didn\u2019t work according to plan, please open up an issue and tell us more about your use case: the dimensionality of the input parameters and of the output, as well as the setup you used to run inference (i.e. number of simulations, number of rounds,\u2026). To report bugs and suggest features (including better documentation), please equally head over to issues on GitHub . Code contributions \u00b6 Contributions to the sbi package are welcome! In general, we use pull requests to make changes to sbi . So, if you are planning to make a contribution, please fork, create a feature branch and then make a PR from your feature branch to the upstream sbi ( details ). To give credits to contributors, we consider adding contributors who repeatedly and substantially contributed to sbi to the list of authors of the package at the end of every year. Additionally, we mention all contributors in the releases. Development environment \u00b6 Clone the repo and install all the dependencies using the environment.yml file to create a conda environment: conda env create -f environment.yml . If you already have an sbi environment and want to refresh dependencies, just run conda env update -f environment.yml --prune . Alternatively, you can install via setup.py using pip install -e \".[dev]\" (the dev flag installs development and testing dependencies). Style conventions \u00b6 For docstrings and comments, we use Google Style . Code needs to pass through the following tools, which are installed alongside sbi : black : Automatic code formatting for Python. You can run black manually from the console using black . in the top directory of the repository, which will format all files. isort : Used to consistently order imports. You can run isort manually from the console using isort in the top directory. pyright : Used for static type checking. black and isort and pyright are checked as part of our CI actions. If these checks fail please make sure you have installed the latest versions for each of them and run them locally. Online documentation \u00b6 Most of the documentation is written in markdown ( basic markdown guide ). You can directly fix mistakes and suggest clearer formulations in markdown files simply by initiating a PR on through GitHub. Click on documentation file and look for the little pencil at top right.","title":"Guide"},{"location":"contribute/#user-experiences-bugs-and-feature-requests","text":"If you are using sbi to infer the parameters of a simulator, we would be delighted to know how it worked for you. If it didn\u2019t work according to plan, please open up an issue and tell us more about your use case: the dimensionality of the input parameters and of the output, as well as the setup you used to run inference (i.e. number of simulations, number of rounds,\u2026). To report bugs and suggest features (including better documentation), please equally head over to issues on GitHub .","title":"User experiences, bugs, and feature requests"},{"location":"contribute/#code-contributions","text":"Contributions to the sbi package are welcome! In general, we use pull requests to make changes to sbi . So, if you are planning to make a contribution, please fork, create a feature branch and then make a PR from your feature branch to the upstream sbi ( details ). To give credits to contributors, we consider adding contributors who repeatedly and substantially contributed to sbi to the list of authors of the package at the end of every year. Additionally, we mention all contributors in the releases.","title":"Code contributions"},{"location":"contribute/#development-environment","text":"Clone the repo and install all the dependencies using the environment.yml file to create a conda environment: conda env create -f environment.yml . If you already have an sbi environment and want to refresh dependencies, just run conda env update -f environment.yml --prune . Alternatively, you can install via setup.py using pip install -e \".[dev]\" (the dev flag installs development and testing dependencies).","title":"Development environment"},{"location":"contribute/#style-conventions","text":"For docstrings and comments, we use Google Style . Code needs to pass through the following tools, which are installed alongside sbi : black : Automatic code formatting for Python. You can run black manually from the console using black . in the top directory of the repository, which will format all files. isort : Used to consistently order imports. You can run isort manually from the console using isort in the top directory. pyright : Used for static type checking. black and isort and pyright are checked as part of our CI actions. If these checks fail please make sure you have installed the latest versions for each of them and run them locally.","title":"Style conventions"},{"location":"contribute/#online-documentation","text":"Most of the documentation is written in markdown ( basic markdown guide ). You can directly fix mistakes and suggest clearer formulations in markdown files simply by initiating a PR on through GitHub. Click on documentation file and look for the little pencil at top right.","title":"Online documentation"},{"location":"credits/","text":"Credits \u00b6 License \u00b6 sbi is licensed under the Affero General Public License version 3 (AGPLv3) and Copyright (C) 2020 \u00c1lvaro Tejero-Cantero, Jakob H. Macke, Jan-Matthis L\u00fcckmann, Michael Deistler, Jan F. B\u00f6lts. Copyright (C) 2020 Conor M. Durkan. Support \u00b6 sbi has been supported by the German Federal Ministry of Education and Research (BMBF) through the project ADIMEM, FKZ 01IS18052 A-D). ADIMEM is a collaborative project between the groups of Jakob Macke (Uni T\u00fcbingen), Philipp Berens (Uni T\u00fcbingen), Philipp Hennig (Uni T\u00fcbingen) and Marcel Oberlaender (caesar Bonn) which aims to develop inference methods for mechanistic models. Important dependencies and prior art \u00b6 sbi is the successor to delfi , a Theano-based toolbox for sequential neural posterior estimation developed at mackelab . If you were using delfi , we strongly recommend to move your inference over to sbi . Please open issues if you find unexpected behaviour or missing features. We will consider these bugs and give them priority. sbi as a PyTorch-based toolbox started as a fork of conormdurkan/lfi , by Conor M.Durkan . sbi uses density estimators from bayesiains/nflows by Conor M.Durkan , George Papamakarios and Artur Bekasov . These are proxied through pyknos , a package focused on density estimation. sbi uses PyTorch and tries to align with the interfaces (e.g. for probability distributions) adopted by PyTorch . See README.md for a list of publications describing the methods implemented in sbi .","title":"Credits"},{"location":"credits/#credits","text":"","title":"Credits"},{"location":"credits/#license","text":"sbi is licensed under the Affero General Public License version 3 (AGPLv3) and Copyright (C) 2020 \u00c1lvaro Tejero-Cantero, Jakob H. Macke, Jan-Matthis L\u00fcckmann, Michael Deistler, Jan F. B\u00f6lts. Copyright (C) 2020 Conor M. Durkan.","title":"License"},{"location":"credits/#support","text":"sbi has been supported by the German Federal Ministry of Education and Research (BMBF) through the project ADIMEM, FKZ 01IS18052 A-D). ADIMEM is a collaborative project between the groups of Jakob Macke (Uni T\u00fcbingen), Philipp Berens (Uni T\u00fcbingen), Philipp Hennig (Uni T\u00fcbingen) and Marcel Oberlaender (caesar Bonn) which aims to develop inference methods for mechanistic models.","title":"Support"},{"location":"credits/#important-dependencies-and-prior-art","text":"sbi is the successor to delfi , a Theano-based toolbox for sequential neural posterior estimation developed at mackelab . If you were using delfi , we strongly recommend to move your inference over to sbi . Please open issues if you find unexpected behaviour or missing features. We will consider these bugs and give them priority. sbi as a PyTorch-based toolbox started as a fork of conormdurkan/lfi , by Conor M.Durkan . sbi uses density estimators from bayesiains/nflows by Conor M.Durkan , George Papamakarios and Artur Bekasov . These are proxied through pyknos , a package focused on density estimation. sbi uses PyTorch and tries to align with the interfaces (e.g. for probability distributions) adopted by PyTorch . See README.md for a list of publications describing the methods implemented in sbi .","title":"Important dependencies and prior art"},{"location":"faq/","text":"Frequently asked questions \u00b6 Can the algorithms deal with invalid data, e.g. NaN or inf? What should I do when my \u2018posterior samples are outside of the prior support\u2019 in SNPE? When using multiple workers, I get a pickling error. Can I still use multiprocessing? Can I use the GPU for training the density estimator? How should I save and load objects in sbi ? Can I stop neural network training and resume it later? How can I use a prior that is not defined in PyTorch?","title":"FAQ"},{"location":"faq/#frequently-asked-questions","text":"Can the algorithms deal with invalid data, e.g. NaN or inf? What should I do when my \u2018posterior samples are outside of the prior support\u2019 in SNPE? When using multiple workers, I get a pickling error. Can I still use multiprocessing? Can I use the GPU for training the density estimator? How should I save and load objects in sbi ? Can I stop neural network training and resume it later? How can I use a prior that is not defined in PyTorch?","title":"Frequently asked questions"},{"location":"install/","text":"Installation \u00b6 sbi requires Python 3.6 or higher. We recommend to use a conda virtual environment ( Miniconda installation instructions ). If conda is installed on the system, an environment for installing sbi can be created as follows: # Create an environment for sbi (indicate Python 3.6 or higher); activate it $ conda create -n sbi_env python=3.7 && conda activate sbi_env Independent of whether you are using conda or not, sbi can be installed using pip : $ pip install sbi To test the installation, drop into a python prompt and run from sbi.examples.minimal import simple posterior = simple () print ( posterior )","title":"Installation"},{"location":"install/#installation","text":"sbi requires Python 3.6 or higher. We recommend to use a conda virtual environment ( Miniconda installation instructions ). If conda is installed on the system, an environment for installing sbi can be created as follows: # Create an environment for sbi (indicate Python 3.6 or higher); activate it $ conda create -n sbi_env python=3.7 && conda activate sbi_env Independent of whether you are using conda or not, sbi can be installed using pip : $ pip install sbi To test the installation, drop into a python prompt and run from sbi.examples.minimal import simple posterior = simple () print ( posterior )","title":"Installation"},{"location":"reference/","text":"API Reference \u00b6 Inference \u00b6 sbi . inference . base . infer ( simulator , prior , method , num_simulations , num_workers = 1 ) \u00b6 Runs simulation-based inference and returns the posterior. This function provides a simple interface to run sbi. Inference is run for a single round and hence the returned posterior \\(p(\\theta|x)\\) can be sampled and evaluated for any \\(x\\) (i.e. it is amortized). The scope of this function is limited to the most essential features of sbi. For more flexibility (e.g. multi-round inference, different density estimators) please use the flexible interface described here: https://www.mackelab.org/sbi/tutorial/02_flexible_interface/ Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\mathrm{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required prior Distribution A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with .log_prob() and .sample() (for example, a PyTorch distribution) can be used. required method str What inference method to use. Either of SNPE, SNLE or SNRE. required num_simulations int Number of simulation calls. More simulations means a longer runtime, but a better posterior estimate. required num_workers int Number of parallel workers to use for simulations. 1 Returns: Posterior over parameters conditional on observations (amortized). Source code in sbi/inference/base.py def infer ( simulator : Callable , prior : Distribution , method : str , num_simulations : int , num_workers : int = 1 , ) -> NeuralPosterior : r \"\"\"Runs simulation-based inference and returns the posterior. This function provides a simple interface to run sbi. Inference is run for a single round and hence the returned posterior $p(\\theta|x)$ can be sampled and evaluated for any $x$ (i.e. it is amortized). The scope of this function is limited to the most essential features of sbi. For more flexibility (e.g. multi-round inference, different density estimators) please use the flexible interface described here: https://www.mackelab.org/sbi/tutorial/02_flexible_interface/ Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\mathrm{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. method: What inference method to use. Either of SNPE, SNLE or SNRE. num_simulations: Number of simulation calls. More simulations means a longer runtime, but a better posterior estimate. num_workers: Number of parallel workers to use for simulations. Returns: Posterior over parameters conditional on observations (amortized). \"\"\" try : method_fun : Callable = getattr ( sbi . inference , method . upper ()) except AttributeError : raise NameError ( \"Method not available. `method` must be one of 'SNPE', 'SNLE', 'SNRE'.\" ) simulator , prior = prepare_for_sbi ( simulator , prior ) inference = method_fun ( prior = prior ) theta , x = simulate_for_sbi ( simulator = simulator , proposal = prior , num_simulations = num_simulations , num_workers = num_workers , ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () return posterior sbi . utils . user_input_checks . prepare_for_sbi ( simulator , prior ) \u00b6 Prepare simulator and prior for usage in sbi. NOTE: This is a wrapper around process_prior and process_simulator which can be used in isolation as well. Attempts to meet the following requirements by reshaping and type-casting: the simulator function receives as input and returns a Tensor. the simulator can simulate batches of parameters and return batches of data. the prior does not produce batches and samples and evaluates to Tensor. the output shape is a torch.Size((1,N)) (i.e, has a leading batch dimension 1). If this is not possible, a suitable exception will be raised. Parameters: Name Type Description Default simulator Callable Simulator as provided by the user. required prior Prior as provided by the user. required Returns: Type Description Tuple[Callable, torch.distributions.distribution.Distribution] Tuple (simulator, prior) checked and matching the requirements of sbi. Source code in sbi/utils/user_input_checks.py def prepare_for_sbi ( simulator : Callable , prior ) -> Tuple [ Callable , Distribution ]: \"\"\"Prepare simulator and prior for usage in sbi. NOTE: This is a wrapper around `process_prior` and `process_simulator` which can be used in isolation as well. Attempts to meet the following requirements by reshaping and type-casting: - the simulator function receives as input and returns a Tensor.
    - the simulator can simulate batches of parameters and return batches of data.
    - the prior does not produce batches and samples and evaluates to Tensor.
    - the output shape is a `torch.Size((1,N))` (i.e, has a leading batch dimension 1). If this is not possible, a suitable exception will be raised. Args: simulator: Simulator as provided by the user. prior: Prior as provided by the user. Returns: Tuple (simulator, prior) checked and matching the requirements of sbi. \"\"\" # Check prior, return PyTorch prior. prior , _ , prior_returns_numpy = process_prior ( prior ) # Check simulator, returns PyTorch simulator able to simulate batches. simulator = process_simulator ( simulator , prior , prior_returns_numpy ) # Consistency check after making ready for sbi. check_sbi_inputs ( simulator , prior ) return simulator , prior sbi . inference . base . simulate_for_sbi ( simulator , proposal , num_simulations , num_workers = 1 , simulation_batch_size = 1 , seed = None , show_progress_bar = True ) \u00b6 Returns ( \\(\\theta, x\\) ) pairs obtained from sampling the proposal and simulating. This function performs two steps: Sample parameters \\(\\theta\\) from the proposal . Simulate these parameters to obtain \\(x\\) . Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\text{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required proposal Any Probability distribution that the parameters \\(\\theta\\) are sampled from. required num_simulations int Number of simulations that are run. required num_workers int Number of parallel workers to use for simulations. 1 simulation_batch_size int Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). 1 seed Optional[int] Seed for reproducibility. None show_progress_bar bool Whether to show a progress bar for simulating. This will not affect whether there will be a progressbar while drawing samples from the proposal. True Returns: Sampled parameters \\(\\theta\\) and simulation-outputs \\(x\\) . Source code in sbi/inference/base.py def simulate_for_sbi ( simulator : Callable , proposal : Any , num_simulations : int , num_workers : int = 1 , simulation_batch_size : int = 1 , seed : Optional [ int ] = None , show_progress_bar : bool = True , ) -> Tuple [ Tensor , Tensor ]: r \"\"\"Returns ($\\theta, x$) pairs obtained from sampling the proposal and simulating. This function performs two steps: - Sample parameters $\\theta$ from the `proposal`. - Simulate these parameters to obtain $x$. Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\text{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. proposal: Probability distribution that the parameters $\\theta$ are sampled from. num_simulations: Number of simulations that are run. num_workers: Number of parallel workers to use for simulations. simulation_batch_size: Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). seed: Seed for reproducibility. show_progress_bar: Whether to show a progress bar for simulating. This will not affect whether there will be a progressbar while drawing samples from the proposal. Returns: Sampled parameters $\\theta$ and simulation-outputs $x$. \"\"\" theta = proposal . sample (( num_simulations ,)) x = simulate_in_batches ( simulator = simulator , theta = theta , sim_batch_size = simulation_batch_size , num_workers = num_workers , seed = seed , show_progress_bars = show_progress_bar , ) return theta , x sbi.inference.snpe.snpe_a.SNPE_A ( PosteriorEstimator ) \u00b6 __init__ ( self , prior = None , density_estimator = 'mdn_snpe_a' , num_components = 10 , device = 'cpu' , logging_level = 'WARNING' , summary_writer = None , show_progress_bars = True ) special \u00b6 SNPE-A [1]. [1] Fast epsilon-free Inference of Simulation Models with Bayesian Conditional Density Estimation , Papamakarios et al., NeurIPS 2016, https://arxiv.org/abs/1605.06376 . This class implements SNPE-A. SNPE-A trains across multiple rounds with a maximum-likelihood-loss. This will make training converge to the proposal posterior instead of the true posterior. To correct for this, SNPE-A applies a post-hoc correction after training. This correction has to be performed analytically. Thus, SNPE-A is limited to Gaussian distributions for all but the last round. In the last round, SNPE-A can use a Mixture of Gaussians. Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with .log_prob() and .sample() (for example, a PyTorch distribution) can be used. None density_estimator Union[str, Callable] If it is a string (only \u201cmdn_snpe_a\u201d is valid), use a pre-configured mixture of densities network. Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the density estimator. The density estimator needs to provide the methods .log_prob and .sample() . Note that until the last round only a single (multivariate) Gaussian component is used for training (see Algorithm 1 in [1]). In the last round, this component is replicated num_components times, its parameters are perturbed with a very small noise, and then the last training round is done with the expanded Gaussian mixture as estimator for the proposal posterior. 'mdn_snpe_a' num_components int Number of components of the mixture of Gaussians in the last round. This overrides the num_components value passed to posterior_nn() . 10 device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'WARNING' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is /logs .) None show_progress_bars bool Whether to show a progressbar during training. True Source code in sbi/inference/snpe/snpe_a.py def __init__ ( self , prior : Optional [ Distribution ] = None , density_estimator : Union [ str , Callable ] = \"mdn_snpe_a\" , num_components : int = 10 , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"WARNING\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"SNPE-A [1]. [1] _Fast epsilon-free Inference of Simulation Models with Bayesian Conditional Density Estimation_, Papamakarios et al., NeurIPS 2016, https://arxiv.org/abs/1605.06376. This class implements SNPE-A. SNPE-A trains across multiple rounds with a maximum-likelihood-loss. This will make training converge to the proposal posterior instead of the true posterior. To correct for this, SNPE-A applies a post-hoc correction after training. This correction has to be performed analytically. Thus, SNPE-A is limited to Gaussian distributions for all but the last round. In the last round, SNPE-A can use a Mixture of Gaussians. Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. density_estimator: If it is a string (only \"mdn_snpe_a\" is valid), use a pre-configured mixture of densities network. Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the density estimator. The density estimator needs to provide the methods `.log_prob` and `.sample()`. Note that until the last round only a single (multivariate) Gaussian component is used for training (see Algorithm 1 in [1]). In the last round, this component is replicated `num_components` times, its parameters are perturbed with a very small noise, and then the last training round is done with the expanded Gaussian mixture as estimator for the proposal posterior. num_components: Number of components of the mixture of Gaussians in the last round. This overrides the `num_components` value passed to `posterior_nn()`. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `/logs`.) show_progress_bars: Whether to show a progressbar during training. \"\"\" # Catch invalid inputs. if not (( density_estimator == \"mdn_snpe_a\" ) or callable ( density_estimator )): raise TypeError ( \"The `density_estimator` passed to SNPE_A needs to be a \" \"callable or the string 'mdn_snpe_a'!\" ) # `num_components` will be used to replicate the Gaussian in the last round. self . _num_components = num_components self . _ran_final_round = False # WARNING: sneaky trick ahead. We proxy the parent's `train` here, # requiring the signature to have `num_atoms`, save it for use below, and # continue. It's sneaky because we are using the object (self) as a namespace # to pass arguments between functions, and that's implicit state management. kwargs = utils . del_entries ( locals (), entries = ( \"self\" , \"__class__\" , \"num_components\" ), ) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , proposal = None , exclude_invalid_x = None , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required proposal Optional[sbi.inference.posteriors.direct_posterior.DirectPosterior] The distribution that the parameters \\(\\theta\\) were sampled from. Pass None if the parameters were sampled from the prior. If not None , it will trigger a different loss-function. None exclude_invalid_x Optional[bool] Whether invalid simulations are discarded during training. For single-round SNPE, it is fine to discard invalid simulations, but for multi-round SNPE (atomic), discarding invalid simulations gives systematically wrong results. If None , it will be True in the first round and False in later rounds. None data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description PosteriorEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snpe/snpe_a.py def append_simulations ( self , theta : Tensor , x : Tensor , proposal : Optional [ DirectPosterior ] = None , exclude_invalid_x : Optional [ bool ] = None , data_device : Optional [ str ] = None , ) -> \"PosteriorEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. proposal: The distribution that the parameters $\\theta$ were sampled from. Pass `None` if the parameters were sampled from the prior. If not `None`, it will trigger a different loss-function. exclude_invalid_x: Whether invalid simulations are discarded during training. For single-round SNPE, it is fine to discard invalid simulations, but for multi-round SNPE (atomic), discarding invalid simulations gives systematically wrong results. If `None`, it will be `True` in the first round and `False` in later rounds. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" if ( proposal is None or proposal is self . _prior or ( isinstance ( proposal , RestrictedPrior ) and proposal . _prior is self . _prior ) ): # The `_data_round_index` will later be used to infer if one should train # with MLE loss or with atomic loss (see, in `train()`: # self._round = max(self._data_round_index)) current_round = 0 else : if not self . _data_round_index : # This catches a pretty specific case: if, in the first round, one # passes data that does not come from the prior. current_round = 1 else : current_round = max ( self . _data_round_index ) + 1 if exclude_invalid_x is None : if current_round == 0 : exclude_invalid_x = True else : exclude_invalid_x = False if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x = exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) if ( type ( self ) . __name__ == \"SNPE_C\" and current_round > 0 and not self . use_non_atomic_loss ): nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"Multiround SNPE-C (atomic)\" , ) else : npe_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"Single-round NPE\" ) self . _check_proposal ( proposal ) self . _data_round_index . append ( current_round ) prior_masks = mask_sims_from_prior ( int ( current_round > 0 ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _proposal_roundwise . append ( proposal ) if self . _prior is None or isinstance ( self . _prior , ImproperEmpirical ): if proposal is not None : raise ValueError ( \"You had not passed a prior at initialization, but now you \" \"passed a proposal. If you want to run multi-round SNPE, you have \" \"to specify a prior (set the `.prior` argument or re-initialize \" \"the object with a prior distribution). If the samples you passed \" \"to `append_simulations()` were sampled from the prior, you can \" \"run single-round inference with \" \"`append_simulations(..., proposal=None)`.\" ) theta_prior = self . get_simulations ()[ 0 ] . to ( self . _device ) self . _prior = ImproperEmpirical ( theta_prior , ones ( theta_prior . shape [ 0 ], device = self . _device ) ) return self build_posterior ( self , density_estimator = None , prior = None ) \u00b6 Build posterior from the neural density estimator. This method first corrects the estimated density with correct_for_proposal and then returns a DirectPosterior . Parameters: Name Type Description Default density_estimator Optional[Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None Returns: Type Description DirectPosterior Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods. Source code in sbi/inference/snpe/snpe_a.py def build_posterior ( self , density_estimator : Optional [ TorchModule ] = None , prior : Optional [ Distribution ] = None , ) -> \"DirectPosterior\" : r \"\"\"Build posterior from the neural density estimator. This method first corrects the estimated density with `correct_for_proposal` and then returns a `DirectPosterior`. Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods. \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNPE_A(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior wrapped_density_estimator = self . correct_for_proposal ( density_estimator = density_estimator ) self . _posterior = DirectPosterior ( posterior_estimator = wrapped_density_estimator , # type: ignore prior = prior , ) return deepcopy ( self . _posterior ) correct_for_proposal ( self , density_estimator = None ) \u00b6 Build mixture of Gaussians that approximates the posterior. Returns a SNPE_A_MDN object, which applies the posthoc-correction required in SNPE-A. Parameters: Name Type Description Default density_estimator Optional[Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None Returns: Type Description SNPE_A_MDN Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods. Source code in sbi/inference/snpe/snpe_a.py def correct_for_proposal ( self , density_estimator : Optional [ TorchModule ] = None , ) -> \"SNPE_A_MDN\" : r \"\"\"Build mixture of Gaussians that approximates the posterior. Returns a `SNPE_A_MDN` object, which applies the posthoc-correction required in SNPE-A. Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods. \"\"\" if density_estimator is None : density_estimator = deepcopy ( self . _neural_net ) # PosteriorEstimator.train() also returns a deepcopy, mimic this here # If internal net is used device is defined. device = self . _device else : # Otherwise, infer it from the device of the net parameters. device = str ( next ( density_estimator . parameters ()) . device ) # Set proposal of the density estimator. # This also evokes the z-scoring correction if necessary. if ( self . _proposal_roundwise [ - 1 ] is self . _prior or self . _proposal_roundwise [ - 1 ] is None ): proposal = self . _prior assert isinstance ( proposal , ( MultivariateNormal , utils . BoxUniform ) ), \"\"\"Prior must be `torch.distributions.MultivariateNormal` or `sbi.utils. BoxUniform`\"\"\" else : assert isinstance ( self . _proposal_roundwise [ - 1 ], DirectPosterior ), \"\"\"The proposal you passed to `append_simulations` is neither the prior nor a `DirectPosterior`. SNPE-A currently only supports these scenarios. \"\"\" proposal = self . _proposal_roundwise [ - 1 ] # Create the SNPE_A_MDN wrapped_density_estimator = SNPE_A_MDN ( flow = density_estimator , # type: ignore proposal = proposal , prior = self . _prior , device = device , ) return wrapped_density_estimator get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snpe/snpe_a.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snpe/snpe_a.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , final_round = False , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , calibration_kernel = None , resume_training = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None , component_perturbation = 0.005 ) \u00b6 Return density estimator that approximates the proposal posterior. [1] Fast epsilon-free Inference of Simulation Models with Bayesian Conditional Density Estimation , Papamakarios et al., NeurIPS 2016, https://arxiv.org/abs/1605.06376 . Training is performed with maximum likelihood on samples from the latest round, which leads the algorithm to converge to the proposal posterior. Parameters: Name Type Description Default final_round bool Whether we are in the last round of training or not. For all but the last round, Algorithm 1 from [1] is executed. In last the round, Algorithm 2 from [1] is executed once. False training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 calibration_kernel Optional[Callable] A function to calibrate the loss with respect to the simulations x . See Lueckmann, Gon\u00e7alves et al., NeurIPS 2017. None resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False force_first_round_loss If True , train with maximum likelihood, i.e., potentially ignoring the correction for using a proposal distribution different from the prior. required retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. Not supported for SNPE-A. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None component_perturbation float The standard deviation applied to all weights and biases when, in the last round, the Mixture of Gaussians is build from a single Gaussian. This value can be problem-specific and also depends on the number of mixture components. 0.005 Returns: Type Description Module Density estimator that approximates the distribution \\(p(\\theta|x)\\) . Source code in sbi/inference/snpe/snpe_a.py def train ( self , final_round : bool = False , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , calibration_kernel : Optional [ Callable ] = None , resume_training : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , component_perturbation : float = 5e-3 , ) -> nn . Module : r \"\"\"Return density estimator that approximates the proposal posterior. [1] _Fast epsilon-free Inference of Simulation Models with Bayesian Conditional Density Estimation_, Papamakarios et al., NeurIPS 2016, https://arxiv.org/abs/1605.06376. Training is performed with maximum likelihood on samples from the latest round, which leads the algorithm to converge to the proposal posterior. Args: final_round: Whether we are in the last round of training or not. For all but the last round, Algorithm 1 from [1] is executed. In last the round, Algorithm 2 from [1] is executed once. training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. calibration_kernel: A function to calibrate the loss with respect to the simulations `x`. See Lueckmann, Gon\u00e7alves et al., NeurIPS 2017. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. force_first_round_loss: If `True`, train with maximum likelihood, i.e., potentially ignoring the correction for using a proposal distribution different from the prior. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. Not supported for SNPE-A. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) component_perturbation: The standard deviation applied to all weights and biases when, in the last round, the Mixture of Gaussians is build from a single Gaussian. This value can be problem-specific and also depends on the number of mixture components. Returns: Density estimator that approximates the distribution $p(\\theta|x)$. \"\"\" assert not retrain_from_scratch , \"\"\"Retraining from scratch is not supported in SNPE-A yet. The reason for this is that, if we reininitialized the density estimator, the z-scoring would change, which would break the posthoc correction. This is a pure implementation issue.\"\"\" kwargs = utils . del_entries ( locals (), entries = ( \"self\" , \"__class__\" , \"final_round\" , \"component_perturbation\" , ), ) # SNPE-A always discards the prior samples. kwargs [ \"discard_prior_samples\" ] = True kwargs [ \"force_first_round_loss\" ] = True self . _round = max ( self . _data_round_index ) if final_round : # If there is (will be) only one round, train with Algorithm 2 from [1]. if self . _round == 0 : self . _build_neural_net = partial ( self . _build_neural_net , num_components = self . _num_components ) # Run Algorithm 2 from [1]. elif not self . _ran_final_round : # Now switch to the specified number of components. This method will # only be used if `retrain_from_scratch=True`. Otherwise, # the MDN will be built from replicating the single-component net for # `num_component` times (via `_expand_mog()`). self . _build_neural_net = partial ( self . _build_neural_net , num_components = self . _num_components ) # Extend the MDN to the originally desired number of components. self . _expand_mog ( eps = component_perturbation ) else : warnings . warn ( \"You have already run SNPE-A with `final_round=True`. Running it\" \"again with this setting will not allow computing the posthoc\" \"correction applied in SNPE-A. Thus, you will get an error when \" \"calling `.build_posterior()` after training.\" , UserWarning , ) else : # Run Algorithm 1 from [1]. # Wrap the function that builds the MDN such that we can make # sure that there is only one component when running. self . _build_neural_net = partial ( self . _build_neural_net , num_components = 1 ) if final_round : self . _ran_final_round = True return super () . train ( ** kwargs ) sbi.inference.snpe.snpe_c.SNPE_C ( PosteriorEstimator ) \u00b6 __init__ ( self , prior = None , density_estimator = 'maf' , device = 'cpu' , logging_level = 'WARNING' , summary_writer = None , show_progress_bars = True ) special \u00b6 SNPE-C / APT [1]. [1] Automatic Posterior Transformation for Likelihood-free Inference , Greenberg et al., ICML 2019, https://arxiv.org/abs/1905.07488 . This class implements two loss variants of SNPE-C: the non-atomic and the atomic version. The atomic loss of SNPE-C can be used for any density estimator, i.e. also for normalizing flows. However, it suffers from leakage issues. On the other hand, the non-atomic loss can only be used only if the proposal distribution is a mixture of Gaussians, the density estimator is a mixture of Gaussians, and the prior is either Gaussian or Uniform. It does not suffer from leakage issues. At the beginning of each round, we print whether the non-atomic or the atomic version is used. In this codebase, we will automatically switch to the non-atomic loss if the following criteria are fulfilled: - proposal is a DirectPosterior with density_estimator mdn , as built with utils.sbi.posterior_nn() . - the density estimator is a mdn , as built with utils.sbi.posterior_nn() . - isinstance(prior, MultivariateNormal) (from torch.distributions ) or isinstance(prior, sbi.utils.BoxUniform) Note that custom implementations of any of these densities (or estimators) will not trigger the non-atomic loss, and the algorithm will fall back onto using the atomic loss. Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. None density_estimator Union[str, Callable] If it is a string, use a pre-configured network of the provided type (one of nsf, maf, mdn, made). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the density estimator. The density estimator needs to provide the methods .log_prob and .sample() . 'maf' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'WARNING' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is /logs .) None show_progress_bars bool Whether to show a progressbar during training. True Source code in sbi/inference/snpe/snpe_c.py def __init__ ( self , prior : Optional [ Distribution ] = None , density_estimator : Union [ str , Callable ] = \"maf\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"WARNING\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"SNPE-C / APT [1]. [1] _Automatic Posterior Transformation for Likelihood-free Inference_, Greenberg et al., ICML 2019, https://arxiv.org/abs/1905.07488. This class implements two loss variants of SNPE-C: the non-atomic and the atomic version. The atomic loss of SNPE-C can be used for any density estimator, i.e. also for normalizing flows. However, it suffers from leakage issues. On the other hand, the non-atomic loss can only be used only if the proposal distribution is a mixture of Gaussians, the density estimator is a mixture of Gaussians, and the prior is either Gaussian or Uniform. It does not suffer from leakage issues. At the beginning of each round, we print whether the non-atomic or the atomic version is used. In this codebase, we will automatically switch to the non-atomic loss if the following criteria are fulfilled:
    - proposal is a `DirectPosterior` with density_estimator `mdn`, as built with `utils.sbi.posterior_nn()`.
    - the density estimator is a `mdn`, as built with `utils.sbi.posterior_nn()`.
    - `isinstance(prior, MultivariateNormal)` (from `torch.distributions`) or `isinstance(prior, sbi.utils.BoxUniform)` Note that custom implementations of any of these densities (or estimators) will not trigger the non-atomic loss, and the algorithm will fall back onto using the atomic loss. Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. density_estimator: If it is a string, use a pre-configured network of the provided type (one of nsf, maf, mdn, made). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the density estimator. The density estimator needs to provide the methods `.log_prob` and `.sample()`. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `/logs`.) show_progress_bars: Whether to show a progressbar during training. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , proposal = None , exclude_invalid_x = None , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required proposal Optional[sbi.inference.posteriors.direct_posterior.DirectPosterior] The distribution that the parameters \\(\\theta\\) were sampled from. Pass None if the parameters were sampled from the prior. If not None , it will trigger a different loss-function. None exclude_invalid_x Optional[bool] Whether invalid simulations are discarded during training. For single-round SNPE, it is fine to discard invalid simulations, but for multi-round SNPE (atomic), discarding invalid simulations gives systematically wrong results. If None , it will be True in the first round and False in later rounds. None data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description PosteriorEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snpe/snpe_c.py def append_simulations ( self , theta : Tensor , x : Tensor , proposal : Optional [ DirectPosterior ] = None , exclude_invalid_x : Optional [ bool ] = None , data_device : Optional [ str ] = None , ) -> \"PosteriorEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. proposal: The distribution that the parameters $\\theta$ were sampled from. Pass `None` if the parameters were sampled from the prior. If not `None`, it will trigger a different loss-function. exclude_invalid_x: Whether invalid simulations are discarded during training. For single-round SNPE, it is fine to discard invalid simulations, but for multi-round SNPE (atomic), discarding invalid simulations gives systematically wrong results. If `None`, it will be `True` in the first round and `False` in later rounds. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" if ( proposal is None or proposal is self . _prior or ( isinstance ( proposal , RestrictedPrior ) and proposal . _prior is self . _prior ) ): # The `_data_round_index` will later be used to infer if one should train # with MLE loss or with atomic loss (see, in `train()`: # self._round = max(self._data_round_index)) current_round = 0 else : if not self . _data_round_index : # This catches a pretty specific case: if, in the first round, one # passes data that does not come from the prior. current_round = 1 else : current_round = max ( self . _data_round_index ) + 1 if exclude_invalid_x is None : if current_round == 0 : exclude_invalid_x = True else : exclude_invalid_x = False if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x = exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) if ( type ( self ) . __name__ == \"SNPE_C\" and current_round > 0 and not self . use_non_atomic_loss ): nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"Multiround SNPE-C (atomic)\" , ) else : npe_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"Single-round NPE\" ) self . _check_proposal ( proposal ) self . _data_round_index . append ( current_round ) prior_masks = mask_sims_from_prior ( int ( current_round > 0 ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _proposal_roundwise . append ( proposal ) if self . _prior is None or isinstance ( self . _prior , ImproperEmpirical ): if proposal is not None : raise ValueError ( \"You had not passed a prior at initialization, but now you \" \"passed a proposal. If you want to run multi-round SNPE, you have \" \"to specify a prior (set the `.prior` argument or re-initialize \" \"the object with a prior distribution). If the samples you passed \" \"to `append_simulations()` were sampled from the prior, you can \" \"run single-round inference with \" \"`append_simulations(..., proposal=None)`.\" ) theta_prior = self . get_simulations ()[ 0 ] . to ( self . _device ) self . _prior = ImproperEmpirical ( theta_prior , ones ( theta_prior . shape [ 0 ], device = self . _device ) ) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'rejection' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. For SNPE, the posterior distribution that is returned here implements the following functionality over the raw neural density estimator: - correct the calculation of the log probability such that it compensates for the leakage. - reject samples that lie outside of the prior bounds. - alternatively, if leakage is very high (which can happen for multi-round SNPE), sample from the posterior with MCMC. Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'rejection' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior or DirectPosterior . By default, DirectPosterior is used. Only if rejection_sampling_parameters contains proposal , a RejectionPosterior is instantiated. {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior, sbi.inference.posteriors.direct_posterior.DirectPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snpe/snpe_c.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"rejection\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior , DirectPosterior ]: r \"\"\"Build posterior from the neural density estimator. For SNPE, the posterior distribution that is returned here implements the following functionality over the raw neural density estimator: - correct the calculation of the log probability such that it compensates for the leakage. - reject samples that lie outside of the prior bounds. - alternatively, if leakage is very high (which can happen for multi-round SNPE), sample from the posterior with MCMC. Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior` or `DirectPosterior`. By default, `DirectPosterior` is used. Only if `rejection_sampling_parameters` contains `proposal`, a `RejectionPosterior` is instantiated. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert self . _prior is not None , ( \"You did not pass a prior. You have to pass the prior either at \" \"initialization `inference = SNPE(prior)` or to \" \"`.build_posterior(prior=prior)`.\" ) prior = self . _prior else : utils . check_prior ( prior ) if density_estimator is None : posterior_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : posterior_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = posterior_estimator_based_potential ( posterior_estimator = posterior_estimator , prior = prior , x_o = None , ) if sample_with == \"rejection\" : if \"proposal\" in rejection_sampling_parameters . keys (): self . _posterior = RejectionPosterior ( potential_fn = potential_fn , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) else : self . _posterior = DirectPosterior ( posterior_estimator = posterior_estimator , # type: ignore prior = prior , x_shape = self . _x_shape , device = device , ) elif sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snpe/snpe_c.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snpe/snpe_c.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , num_atoms = 10 , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , calibration_kernel = None , resume_training = False , force_first_round_loss = False , discard_prior_samples = False , use_combined_loss = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None ) \u00b6 Return density estimator that approximates the distribution \\(p(\\theta|x)\\) . Parameters: Name Type Description Default num_atoms int Number of atoms to use for classification. 10 training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 calibration_kernel Optional[Callable] A function to calibrate the loss with respect to the simulations x . See Lueckmann, Gon\u00e7alves et al., NeurIPS 2017. None resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False force_first_round_loss bool If True , train with maximum likelihood, i.e., potentially ignoring the correction for using a proposal distribution different from the prior. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False use_combined_loss bool Whether to train the neural net also on prior samples using maximum likelihood in addition to training it on all samples using atomic loss. The extra MLE loss helps prevent density leaking with bounded priors. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None Returns: Type Description Module Density estimator that approximates the distribution \\(p(\\theta|x)\\) . Source code in sbi/inference/snpe/snpe_c.py def train ( self , num_atoms : int = 10 , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , calibration_kernel : Optional [ Callable ] = None , resume_training : bool = False , force_first_round_loss : bool = False , discard_prior_samples : bool = False , use_combined_loss : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , ) -> nn . Module : r \"\"\"Return density estimator that approximates the distribution $p(\\theta|x)$. Args: num_atoms: Number of atoms to use for classification. training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. calibration_kernel: A function to calibrate the loss with respect to the simulations `x`. See Lueckmann, Gon\u00e7alves et al., NeurIPS 2017. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. force_first_round_loss: If `True`, train with maximum likelihood, i.e., potentially ignoring the correction for using a proposal distribution different from the prior. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. use_combined_loss: Whether to train the neural net also on prior samples using maximum likelihood in addition to training it on all samples using atomic loss. The extra MLE loss helps prevent density leaking with bounded priors. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) Returns: Density estimator that approximates the distribution $p(\\theta|x)$. \"\"\" # WARNING: sneaky trick ahead. We proxy the parent's `train` here, # requiring the signature to have `num_atoms`, save it for use below, and # continue. It's sneaky because we are using the object (self) as a namespace # to pass arguments between functions, and that's implicit state management. self . _num_atoms = num_atoms self . _use_combined_loss = use_combined_loss kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" , \"num_atoms\" , \"use_combined_loss\" ), ) self . _round = max ( self . _data_round_index ) if self . _round > 0 : # Set the proposal to the last proposal that was passed by the user. For # atomic SNPE, it does not matter what the proposal is. For non-atomic # SNPE, we only use the latest data that was passed, i.e. the one from the # last proposal. proposal = self . _proposal_roundwise [ - 1 ] self . use_non_atomic_loss = ( isinstance ( proposal , DirectPosterior ) and isinstance ( proposal . posterior_estimator . _distribution , mdn ) and isinstance ( self . _neural_net . _distribution , mdn ) and check_dist_class ( self . _prior , class_to_check = ( Uniform , MultivariateNormal ) )[ 0 ] ) algorithm = \"non-atomic\" if self . use_non_atomic_loss else \"atomic\" print ( f \"Using SNPE-C with { algorithm } loss\" ) if self . use_non_atomic_loss : # Take care of z-scoring, pre-compute and store prior terms. self . _set_state_for_mog_proposal () return super () . train ( ** kwargs ) sbi.inference.snle.snle_a.SNLE_A ( LikelihoodEstimator ) \u00b6 __init__ ( self , prior = None , density_estimator = 'maf' , device = 'cpu' , logging_level = 'WARNING' , summary_writer = None , show_progress_bars = True ) special \u00b6 Sequential Neural Likelihood [1]. [1] Sequential Neural Likelihood: Fast Likelihood-free Inference with Autoregressive Flows_, Papamakarios et al., AISTATS 2019, https://arxiv.org/abs/1805.07226 Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If None , the prior must be passed to .build_posterior() . None density_estimator Union[str, Callable] If it is a string, use a pre-configured network of the provided type (one of nsf, maf, mdn, made). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the density estimator. The density estimator needs to provide the methods .log_prob and .sample() . 'maf' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'WARNING' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is /logs .) None show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/snle/snle_a.py def __init__ ( self , prior : Optional [ Distribution ] = None , density_estimator : Union [ str , Callable ] = \"maf\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"WARNING\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"Sequential Neural Likelihood [1]. [1] Sequential Neural Likelihood: Fast Likelihood-free Inference with Autoregressive Flows_, Papamakarios et al., AISTATS 2019, https://arxiv.org/abs/1805.07226 Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If `None`, the prior must be passed to `.build_posterior()`. density_estimator: If it is a string, use a pre-configured network of the provided type (one of nsf, maf, mdn, made). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the density estimator. The density estimator needs to provide the methods `.log_prob` and `.sample()`. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , exclude_invalid_x = False , from_round = 0 , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required exclude_invalid_x bool Whether invalid simulations are discarded during training. If False , SNLE raises an error when invalid simulations are found. If True , invalid simulations are discarded and training can proceed, but this gives systematically wrong results. False from_round int Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for SNLE . Only when the user later on requests .train(discard_prior_samples=True) , we use these indices to find which training data stemmed from the prior. 0 data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description LikelihoodEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snle/snle_a.py def append_simulations ( self , theta : Tensor , x : Tensor , exclude_invalid_x : bool = False , from_round : int = 0 , data_device : Optional [ str ] = None , ) -> \"LikelihoodEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. exclude_invalid_x: Whether invalid simulations are discarded during training. If `False`, SNLE raises an error when invalid simulations are found. If `True`, invalid simulations are discarded and training can proceed, but this gives systematically wrong results. from_round: Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for `SNLE`. Only when the user later on requests `.train(discard_prior_samples=True)`, we use these indices to find which training data stemmed from the prior. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"SNLE\" ) if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) prior_masks = mask_sims_from_prior ( int ( from_round ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _data_round_index . append ( int ( from_round )) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'mcmc' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. SNLE trains a neural network to approximate the likelihood \\(p(x|\\theta)\\) . The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability \\(p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)\\) and draw samples from the posterior with MCMC or rejection sampling. Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'mcmc' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior . {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snle/snle_a.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"mcmc\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior ]: r \"\"\"Build posterior from the neural density estimator. SNLE trains a neural network to approximate the likelihood $p(x|\\theta)$. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability $p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)$ and draw samples from the posterior with MCMC or rejection sampling. Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior`. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNLE(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior else : check_prior ( prior ) if density_estimator is None : likelihood_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : likelihood_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = likelihood_estimator_based_potential ( likelihood_estimator = likelihood_estimator , prior = prior , x_o = None , ) if sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"rejection\" : self . _posterior = RejectionPosterior ( potential_fn = potential_fn , proposal = prior , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snle/snle_a.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snle/snle_a.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , resume_training = False , discard_prior_samples = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None ) inherited \u00b6 Train the density estimator to learn the distribution \\(p(x|\\theta)\\) . Parameters: Name Type Description Default resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None Returns: Type Description Flow Density estimator that has learned the distribution \\(p(x|\\theta)\\) . Source code in sbi/inference/snle/snle_a.py def train ( self , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , resume_training : bool = False , discard_prior_samples : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , ) -> flows . Flow : r \"\"\"Train the density estimator to learn the distribution $p(x|\\theta)$. Args: resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) Returns: Density estimator that has learned the distribution $p(x|\\theta)$. \"\"\" # Load data from most recent round. self . _round = max ( self . _data_round_index ) # Starting index for the training set (1 = discard round-0 samples). start_idx = int ( discard_prior_samples and self . _round > 0 ) train_loader , val_loader = self . get_dataloaders ( start_idx , training_batch_size , validation_fraction , resume_training , dataloader_kwargs = dataloader_kwargs , ) # First round or if retraining from scratch: # Call the `self._build_neural_net` with the rounds' thetas and xs as # arguments, which will build the neural network # This is passed into NeuralPosterior, to create a neural posterior which # can `sample()` and `log_prob()`. The network is accessible via `.net`. if self . _neural_net is None or retrain_from_scratch : # Get theta,x to initialize NN theta , x , _ = self . get_simulations ( starting_round = start_idx ) # Use only training data for building the neural net (z-scoring transforms) self . _neural_net = self . _build_neural_net ( theta [ self . train_indices ] . to ( \"cpu\" ), x [ self . train_indices ] . to ( \"cpu\" ), ) self . _x_shape = x_shape_from_simulation ( x . to ( \"cpu\" )) del theta , x assert ( len ( self . _x_shape ) < 3 ), \"SNLE cannot handle multi-dimensional simulator output.\" self . _neural_net . to ( self . _device ) if not resume_training : self . optimizer = optim . Adam ( list ( self . _neural_net . parameters ()), lr = learning_rate , ) self . epoch , self . _val_log_prob = 0 , float ( \"-Inf\" ) while self . epoch <= max_num_epochs and not self . _converged ( self . epoch , stop_after_epochs ): # Train for a single epoch. self . _neural_net . train () train_log_probs_sum = 0 for batch in train_loader : self . optimizer . zero_grad () theta_batch , x_batch = ( batch [ 0 ] . to ( self . _device ), batch [ 1 ] . to ( self . _device ), ) # Evaluate on x with theta as context. train_losses = self . _loss ( theta = theta_batch , x = x_batch ) train_loss = torch . mean ( train_losses ) train_log_probs_sum -= train_losses . sum () . item () train_loss . backward () if clip_max_norm is not None : clip_grad_norm_ ( self . _neural_net . parameters (), max_norm = clip_max_norm , ) self . optimizer . step () self . epoch += 1 train_log_prob_average = train_log_probs_sum / ( len ( train_loader ) * train_loader . batch_size # type: ignore ) self . _summary [ \"training_log_probs\" ] . append ( train_log_prob_average ) # Calculate validation performance. self . _neural_net . eval () val_log_prob_sum = 0 with torch . no_grad (): for batch in val_loader : theta_batch , x_batch = ( batch [ 0 ] . to ( self . _device ), batch [ 1 ] . to ( self . _device ), ) # Evaluate on x with theta as context. val_losses = self . _loss ( theta = theta_batch , x = x_batch ) val_log_prob_sum -= val_losses . sum () . item () # Take mean over all validation samples. self . _val_log_prob = val_log_prob_sum / ( len ( val_loader ) * val_loader . batch_size # type: ignore ) # Log validation log prob for every epoch. self . _summary [ \"validation_log_probs\" ] . append ( self . _val_log_prob ) self . _maybe_show_progress ( self . _show_progress_bars , self . epoch ) self . _report_convergence_at_end ( self . epoch , stop_after_epochs , max_num_epochs ) # Update summary. self . _summary [ \"epochs_trained\" ] . append ( self . epoch ) self . _summary [ \"best_validation_log_prob\" ] . append ( self . _best_val_log_prob ) # Update TensorBoard and summary dict. self . _summarize ( round_ = self . _round ) # Update description for progress bar. if show_train_summary : print ( self . _describe_round ( self . _round , self . _summary )) # Avoid keeping the gradients in the resulting network, which can # cause memory leakage when benchmarking. self . _neural_net . zero_grad ( set_to_none = True ) return deepcopy ( self . _neural_net ) sbi.inference.snre.snre_a.SNRE_A ( RatioEstimator ) \u00b6 __init__ ( self , prior = None , classifier = 'resnet' , device = 'cpu' , logging_level = 'warning' , summary_writer = None , show_progress_bars = True ) special \u00b6 AALR[1], here known as SNRE_A. [1] Likelihood-free MCMC with Amortized Approximate Likelihood Ratios , Hermans et al., ICML 2020, https://arxiv.org/abs/1903.04057 Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If None , the prior must be passed to .build_posterior() . None classifier Union[str, Callable] Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the classifier. 'resnet' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'warning' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is /logs .) None show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/snre/snre_a.py def __init__ ( self , prior : Optional [ Distribution ] = None , classifier : Union [ str , Callable ] = \"resnet\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"warning\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"AALR[1], here known as SNRE_A. [1] _Likelihood-free MCMC with Amortized Approximate Likelihood Ratios_, Hermans et al., ICML 2020, https://arxiv.org/abs/1903.04057 Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If `None`, the prior must be passed to `.build_posterior()`. classifier: Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the classifier. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , exclude_invalid_x = False , from_round = 0 , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required exclude_invalid_x bool Whether invalid simulations are discarded during training. If False , SNRE raises an error when invalid simulations are found. If True , invalid simulations are discarded and training can proceed, but this gives systematically wrong results. False from_round int Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for SNRE . Only when the user later on requests .train(discard_prior_samples=True) , we use these indices to find which training data stemmed from the prior. 0 data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description RatioEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snre/snre_a.py def append_simulations ( self , theta : Tensor , x : Tensor , exclude_invalid_x : bool = False , from_round : int = 0 , data_device : Optional [ str ] = None , ) -> \"RatioEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. exclude_invalid_x: Whether invalid simulations are discarded during training. If `False`, SNRE raises an error when invalid simulations are found. If `True`, invalid simulations are discarded and training can proceed, but this gives systematically wrong results. from_round: Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for `SNRE`. Only when the user later on requests `.train(discard_prior_samples=True)`, we use these indices to find which training data stemmed from the prior. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"SNRE\" ) if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) prior_masks = mask_sims_from_prior ( int ( from_round ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _data_round_index . append ( int ( from_round )) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'mcmc' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability \\(p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)\\) and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the normalized posterior, but sampling still requires MCMC (or rejection sampling). Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'mcmc' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note that some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior . {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snre/snre_a.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"mcmc\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior ]: r \"\"\"Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability $p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)$ and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the **normalized** posterior, but sampling still requires MCMC (or rejection sampling). Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note that some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior`. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNRE(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior else : check_prior ( prior ) if density_estimator is None : ratio_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : ratio_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = ratio_estimator_based_potential ( ratio_estimator = ratio_estimator , prior = prior , x_o = None , ) if sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"rejection\" : self . _posterior = RejectionPosterior ( potential_fn = potential_fn , proposal = prior , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snre/snre_a.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snre/snre_a.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , resume_training = False , discard_prior_samples = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None , loss_kwargs = {}) \u00b6 Return classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Parameters: Name Type Description Default training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None loss_kwargs Dict[str, Any] Additional or updated kwargs to be passed to the self._loss fn. {} Returns: Type Description Module Classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Source code in sbi/inference/snre/snre_a.py def train ( self , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , resume_training : bool = False , discard_prior_samples : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , loss_kwargs : Dict [ str , Any ] = {}, ) -> nn . Module : r \"\"\"Return classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. Args: training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) loss_kwargs: Additional or updated kwargs to be passed to the self._loss fn. Returns: Classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. \"\"\" # AALR is defined for `num_atoms=2`. # Proxy to `super().__call__` to ensure right parameter. kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) return super () . train ( ** kwargs , num_atoms = 2 ) sbi.inference.snre.snre_b.SNRE_B ( RatioEstimator ) \u00b6 __init__ ( self , prior = None , classifier = 'resnet' , device = 'cpu' , logging_level = 'warning' , summary_writer = None , show_progress_bars = True ) special \u00b6 SRE[1], here known as SNRE_B. [1] On Contrastive Learning for Likelihood-free Inference , Durkan et al., ICML 2020, https://arxiv.org/pdf/2002.03712 Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If None , the prior must be passed to .build_posterior() . None classifier Union[str, Callable] Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the classifier. 'resnet' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'warning' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is /logs .) None show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/snre/snre_b.py def __init__ ( self , prior : Optional [ Distribution ] = None , classifier : Union [ str , Callable ] = \"resnet\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"warning\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"SRE[1], here known as SNRE_B. [1] _On Contrastive Learning for Likelihood-free Inference_, Durkan et al., ICML 2020, https://arxiv.org/pdf/2002.03712 Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If `None`, the prior must be passed to `.build_posterior()`. classifier: Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the classifier. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , exclude_invalid_x = False , from_round = 0 , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required exclude_invalid_x bool Whether invalid simulations are discarded during training. If False , SNRE raises an error when invalid simulations are found. If True , invalid simulations are discarded and training can proceed, but this gives systematically wrong results. False from_round int Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for SNRE . Only when the user later on requests .train(discard_prior_samples=True) , we use these indices to find which training data stemmed from the prior. 0 data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description RatioEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snre/snre_b.py def append_simulations ( self , theta : Tensor , x : Tensor , exclude_invalid_x : bool = False , from_round : int = 0 , data_device : Optional [ str ] = None , ) -> \"RatioEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. exclude_invalid_x: Whether invalid simulations are discarded during training. If `False`, SNRE raises an error when invalid simulations are found. If `True`, invalid simulations are discarded and training can proceed, but this gives systematically wrong results. from_round: Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for `SNRE`. Only when the user later on requests `.train(discard_prior_samples=True)`, we use these indices to find which training data stemmed from the prior. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"SNRE\" ) if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) prior_masks = mask_sims_from_prior ( int ( from_round ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _data_round_index . append ( int ( from_round )) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'mcmc' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability \\(p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)\\) and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the normalized posterior, but sampling still requires MCMC (or rejection sampling). Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'mcmc' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note that some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior . {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snre/snre_b.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"mcmc\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior ]: r \"\"\"Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability $p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)$ and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the **normalized** posterior, but sampling still requires MCMC (or rejection sampling). Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note that some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior`. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNRE(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior else : check_prior ( prior ) if density_estimator is None : ratio_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : ratio_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = ratio_estimator_based_potential ( ratio_estimator = ratio_estimator , prior = prior , x_o = None , ) if sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"rejection\" : self . _posterior = RejectionPosterior ( potential_fn = potential_fn , proposal = prior , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snre/snre_b.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snre/snre_b.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , num_atoms = 10 , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , resume_training = False , discard_prior_samples = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None ) \u00b6 Return classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Parameters: Name Type Description Default num_atoms int Number of atoms to use for classification. 10 training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None Returns: Type Description Module Classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Source code in sbi/inference/snre/snre_b.py def train ( self , num_atoms : int = 10 , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , resume_training : bool = False , discard_prior_samples : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , ) -> nn . Module : r \"\"\"Return classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. Args: num_atoms: Number of atoms to use for classification. training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) Returns: Classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) return super () . train ( ** kwargs ) sbi.inference.snre.snre_c.SNRE_C ( RatioEstimator ) \u00b6 __init__ ( self , prior = None , classifier = 'resnet' , device = 'cpu' , logging_level = 'warning' , summary_writer = None , show_progress_bars = True ) special \u00b6 NRE-C[1] is a generalization of the non-sequential (amortized) versions of SNRE_A and SNRE_B. We call the algorithm SNRE_C within sbi . NRE-C: (1) like SNRE_B, features a \u201cmulticlass\u201d loss function where several marginally drawn parameter-data pairs are contrasted against a jointly drawn pair. (2) like AALR/NRE_A, i.e., the non-sequential version of SNRE_A, it encourages the approximate ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) , accessed through .potential() within sbi , to be exact at optimum. This addresses the issue that SNRE_B estimates this ratio only up to an arbitrary function (normalizing constant) of the data \\(x\\) . Just like for all ratio estimation algorithms, the sequential version of SNRE_C will be estimated only up to a function (normalizing constant) of the data \\(x\\) in rounds after the first. [1] Contrastive Neural Ratio Estimation , Benajmin Kurt Miller, et. al., NeurIPS 2022, https://arxiv.org/abs/2210.06170 Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If None , the prior must be passed to .build_posterior() . None classifier Union[str, Callable] Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the classifier. 'resnet' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'warning' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is /logs .) None show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/snre/snre_c.py def __init__ ( self , prior : Optional [ Distribution ] = None , classifier : Union [ str , Callable ] = \"resnet\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"warning\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"NRE-C[1] is a generalization of the non-sequential (amortized) versions of SNRE_A and SNRE_B. We call the algorithm SNRE_C within `sbi`. NRE-C: (1) like SNRE_B, features a \"multiclass\" loss function where several marginally drawn parameter-data pairs are contrasted against a jointly drawn pair. (2) like AALR/NRE_A, i.e., the non-sequential version of SNRE_A, it encourages the approximate ratio $p(\\theta,x)/p(\\theta)p(x)$, accessed through `.potential()` within `sbi`, to be exact at optimum. This addresses the issue that SNRE_B estimates this ratio only up to an arbitrary function (normalizing constant) of the data $x$. Just like for all ratio estimation algorithms, the sequential version of SNRE_C will be estimated only up to a function (normalizing constant) of the data $x$ in rounds after the first. [1] _Contrastive Neural Ratio Estimation_, Benajmin Kurt Miller, et. al., NeurIPS 2022, https://arxiv.org/abs/2210.06170 Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If `None`, the prior must be passed to `.build_posterior()`. classifier: Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the classifier. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , exclude_invalid_x = False , from_round = 0 , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required exclude_invalid_x bool Whether invalid simulations are discarded during training. If False , SNRE raises an error when invalid simulations are found. If True , invalid simulations are discarded and training can proceed, but this gives systematically wrong results. False from_round int Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for SNRE . Only when the user later on requests .train(discard_prior_samples=True) , we use these indices to find which training data stemmed from the prior. 0 data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description RatioEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snre/snre_c.py def append_simulations ( self , theta : Tensor , x : Tensor , exclude_invalid_x : bool = False , from_round : int = 0 , data_device : Optional [ str ] = None , ) -> \"RatioEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. exclude_invalid_x: Whether invalid simulations are discarded during training. If `False`, SNRE raises an error when invalid simulations are found. If `True`, invalid simulations are discarded and training can proceed, but this gives systematically wrong results. from_round: Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for `SNRE`. Only when the user later on requests `.train(discard_prior_samples=True)`, we use these indices to find which training data stemmed from the prior. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"SNRE\" ) if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) prior_masks = mask_sims_from_prior ( int ( from_round ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _data_round_index . append ( int ( from_round )) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'mcmc' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability \\(p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)\\) and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the normalized posterior, but sampling still requires MCMC (or rejection sampling). Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'mcmc' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note that some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior . {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snre/snre_c.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"mcmc\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior ]: r \"\"\"Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability $p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)$ and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the **normalized** posterior, but sampling still requires MCMC (or rejection sampling). Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note that some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior`. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNRE(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior else : check_prior ( prior ) if density_estimator is None : ratio_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : ratio_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = ratio_estimator_based_potential ( ratio_estimator = ratio_estimator , prior = prior , x_o = None , ) if sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"rejection\" : self . _posterior = RejectionPosterior ( potential_fn = potential_fn , proposal = prior , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snre/snre_c.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snre/snre_c.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , num_classes = 5 , gamma = 1.0 , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , resume_training = False , discard_prior_samples = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None ) \u00b6 Return classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Parameters: Name Type Description Default num_classes int Number of theta to classify against, corresponds to \\(K\\) in Contrastive Neural Ratio Estimation . Minimum value is 1. Similar to num_atoms for SNRE_B except SNRE_C has an additional independently drawn sample. The total number of alternative parameters NRE-C \u201csees\u201d is \\(2K-1\\) or 2 * num_classes - 1 divided between two loss terms. 5 gamma float Determines the relative weight of the sum of all \\(K\\) dependently drawn classes against the marginally drawn one. Specifically, \\(p(y=k) :=p_K\\) , \\(p(y=0) := p_0\\) , \\(p_0 = 1 - K p_K\\) , and finally \\(\\gamma := K p_K / p_0\\) . 1.0 training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 exclude_invalid_x Whether to exclude simulation outputs x=NaN or x=\u00b1\u221e during training. Expect errors, silent or explicit, when False . required resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None Returns: Type Description Module Classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Source code in sbi/inference/snre/snre_c.py def train ( self , num_classes : int = 5 , gamma : float = 1.0 , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , resume_training : bool = False , discard_prior_samples : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , ) -> nn . Module : r \"\"\"Return classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. Args: num_classes: Number of theta to classify against, corresponds to $K$ in _Contrastive Neural Ratio Estimation_. Minimum value is 1. Similar to `num_atoms` for SNRE_B except SNRE_C has an additional independently drawn sample. The total number of alternative parameters `NRE-C` \"sees\" is $2K-1$ or `2 * num_classes - 1` divided between two loss terms. gamma: Determines the relative weight of the sum of all $K$ dependently drawn classes against the marginally drawn one. Specifically, $p(y=k) :=p_K$, $p(y=0) := p_0$, $p_0 = 1 - K p_K$, and finally $\\gamma := K p_K / p_0$. training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. exclude_invalid_x: Whether to exclude simulation outputs `x=NaN` or `x=\u00b1\u221e` during training. Expect errors, silent or explicit, when `False`. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) Returns: Classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) kwargs [ \"num_atoms\" ] = kwargs . pop ( \"num_classes\" ) + 1 kwargs [ \"loss_kwargs\" ] = { \"gamma\" : kwargs . pop ( \"gamma\" )} return super () . train ( ** kwargs ) sbi.inference.snre.bnre.BNRE ( SNRE_A ) \u00b6 __init__ ( self , prior = None , classifier = 'resnet' , device = 'cpu' , logging_level = 'warning' , summary_writer = None , show_progress_bars = True ) special \u00b6 Balanced neural ratio estimation (BNRE)[1]. BNRE is a variation of NRE aiming to produce more conservative posterior approximations [1] Delaunoy, A., Hermans, J., Rozet, F., Wehenkel, A., & Louppe, G.. Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation. NeurIPS 2022. https://arxiv.org/abs/2208.13624 Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If None , the prior must be passed to .build_posterior() . None classifier Union[str, Callable] Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations \\((\\theta, x)\\) , which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the classifier. 'resnet' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'warning' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is /logs .) None show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/snre/bnre.py def __init__ ( self , prior : Optional [ Distribution ] = None , classifier : Union [ str , Callable ] = \"resnet\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"warning\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"Balanced neural ratio estimation (BNRE)[1]. BNRE is a variation of NRE aiming to produce more conservative posterior approximations [1] Delaunoy, A., Hermans, J., Rozet, F., Wehenkel, A., & Louppe, G.. Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation. NeurIPS 2022. https://arxiv.org/abs/2208.13624 Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If `None`, the prior must be passed to `.build_posterior()`. classifier: Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations $(\\theta, x)$, which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the classifier. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , exclude_invalid_x = False , from_round = 0 , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required exclude_invalid_x bool Whether invalid simulations are discarded during training. If False , SNRE raises an error when invalid simulations are found. If True , invalid simulations are discarded and training can proceed, but this gives systematically wrong results. False from_round int Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for SNRE . Only when the user later on requests .train(discard_prior_samples=True) , we use these indices to find which training data stemmed from the prior. 0 data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description RatioEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snre/bnre.py def append_simulations ( self , theta : Tensor , x : Tensor , exclude_invalid_x : bool = False , from_round : int = 0 , data_device : Optional [ str ] = None , ) -> \"RatioEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. exclude_invalid_x: Whether invalid simulations are discarded during training. If `False`, SNRE raises an error when invalid simulations are found. If `True`, invalid simulations are discarded and training can proceed, but this gives systematically wrong results. from_round: Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for `SNRE`. Only when the user later on requests `.train(discard_prior_samples=True)`, we use these indices to find which training data stemmed from the prior. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"SNRE\" ) if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) prior_masks = mask_sims_from_prior ( int ( from_round ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _data_round_index . append ( int ( from_round )) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'mcmc' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability \\(p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)\\) and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the normalized posterior, but sampling still requires MCMC (or rejection sampling). Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'mcmc' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note that some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior . {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snre/bnre.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"mcmc\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior ]: r \"\"\"Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability $p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)$ and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the **normalized** posterior, but sampling still requires MCMC (or rejection sampling). Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note that some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior`. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNRE(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior else : check_prior ( prior ) if density_estimator is None : ratio_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : ratio_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = ratio_estimator_based_potential ( ratio_estimator = ratio_estimator , prior = prior , x_o = None , ) if sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"rejection\" : self . _posterior = RejectionPosterior ( potential_fn = potential_fn , proposal = prior , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snre/bnre.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snre/bnre.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , regularization_strength = 100.0 , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , resume_training = False , discard_prior_samples = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None ) \u00b6 Return classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Parameters: Name Type Description Default regularization_strength float The multiplicative coefficient applied to the balancing regularizer ( \\(\\lambda\\) ). 100.0 training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 exclude_invalid_x Whether to exclude simulation outputs x=NaN or x=\u00b1\u221e during training. Expect errors, silent or explicit, when False . required resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None Returns: Type Description Module Classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Source code in sbi/inference/snre/bnre.py def train ( self , regularization_strength : float = 100.0 , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , resume_training : bool = False , discard_prior_samples : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , ) -> nn . Module : r \"\"\"Return classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. Args: regularization_strength: The multiplicative coefficient applied to the balancing regularizer ($\\lambda$). training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. exclude_invalid_x: Whether to exclude simulation outputs `x=NaN` or `x=\u00b1\u221e` during training. Expect errors, silent or explicit, when `False`. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) Returns: Classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) kwargs [ \"loss_kwargs\" ] = { \"regularization_strength\" : kwargs . pop ( \"regularization_strength\" ) } return super () . train ( ** kwargs ) sbi.inference.abc.mcabc.MCABC ( ABCBASE ) \u00b6 __call__ ( self , x_o , num_simulations , eps = None , quantile = None , lra = False , sass = False , sass_fraction = 0.25 , sass_expansion_degree = 1 , kde = False , kde_kwargs = {}, return_summary = False ) special \u00b6 Run MCABC and return accepted parameters or KDE object fitted on them. Parameters: Name Type Description Default x_o Union[torch.Tensor, numpy.ndarray] Observed data. required num_simulations int Number of simulations to run. required eps Optional[float] Acceptance threshold \\(\\epsilon\\) for distance between observed and simulated data. None quantile Optional[float] Upper quantile of smallest distances for which the corresponding parameters are returned, e.g, q=0.01 will return the top 1%. Exactly one of quantile or eps have to be passed. None lra bool Whether to run linear regression adjustment as in Beaumont et al. 2002 False sass bool Whether to determine semi-automatic summary statistics as in Fearnhead & Prangle 2012. False sass_fraction float Fraction of simulation budget used for the initial sass run. 0.25 sass_expansion_degree int Degree of the polynomial feature expansion for the sass regression, default 1 - no expansion. 1 kde bool Whether to run KDE on the accepted parameters to return a KDE object from which one can sample. False kde_kwargs Dict[str, Any] kwargs for performing KDE: \u2018bandwidth=\u2019; either a float, or a string naming a bandwidth heuristics, e.g., \u2018cv\u2019 (cross validation), \u2018silvermann\u2019 or \u2018scott\u2019, default \u2018cv\u2019. \u2018transform\u2019: transform applied to the parameters before doing KDE. \u2018sample_weights\u2019: weights associated with samples. See \u2018get_kde\u2019 for more details {} return_summary bool Whether to return the distances and data corresponding to the accepted parameters. False Returns: Type Description theta (if kde False) accepted parameters kde (if kde True): KDE object based on accepted parameters from which one can .sample() and .log_prob(). summary (if summary True): dictionary containing the accepted paramters (if kde True), distances and simulated data x. Source code in sbi/inference/abc/mcabc.py def __call__ ( self , x_o : Union [ Tensor , ndarray ], num_simulations : int , eps : Optional [ float ] = None , quantile : Optional [ float ] = None , lra : bool = False , sass : bool = False , sass_fraction : float = 0.25 , sass_expansion_degree : int = 1 , kde : bool = False , kde_kwargs : Dict [ str , Any ] = {}, return_summary : bool = False , ) -> Union [ Tuple [ Tensor , dict ], Tuple [ KDEWrapper , dict ], Tensor , KDEWrapper ]: r \"\"\"Run MCABC and return accepted parameters or KDE object fitted on them. Args: x_o: Observed data. num_simulations: Number of simulations to run. eps: Acceptance threshold $\\epsilon$ for distance between observed and simulated data. quantile: Upper quantile of smallest distances for which the corresponding parameters are returned, e.g, q=0.01 will return the top 1%. Exactly one of quantile or `eps` have to be passed. lra: Whether to run linear regression adjustment as in Beaumont et al. 2002 sass: Whether to determine semi-automatic summary statistics as in Fearnhead & Prangle 2012. sass_fraction: Fraction of simulation budget used for the initial sass run. sass_expansion_degree: Degree of the polynomial feature expansion for the sass regression, default 1 - no expansion. kde: Whether to run KDE on the accepted parameters to return a KDE object from which one can sample. kde_kwargs: kwargs for performing KDE: 'bandwidth='; either a float, or a string naming a bandwidth heuristics, e.g., 'cv' (cross validation), 'silvermann' or 'scott', default 'cv'. 'transform': transform applied to the parameters before doing KDE. 'sample_weights': weights associated with samples. See 'get_kde' for more details return_summary: Whether to return the distances and data corresponding to the accepted parameters. Returns: theta (if kde False): accepted parameters kde (if kde True): KDE object based on accepted parameters from which one can .sample() and .log_prob(). summary (if summary True): dictionary containing the accepted paramters (if kde True), distances and simulated data x. \"\"\" # Exactly one of eps or quantile need to be passed. assert ( eps is not None ) ^ ( quantile is not None ), \"Eps or quantile must be passed, but not both.\" # Run SASS and change the simulator and x_o accordingly. if sass : num_pilot_simulations = int ( sass_fraction * num_simulations ) self . logger . info ( f \"Running SASS with { num_pilot_simulations } pilot samples.\" ) num_simulations -= num_pilot_simulations pilot_theta = self . prior . sample (( num_pilot_simulations ,)) pilot_x = self . _batched_simulator ( pilot_theta ) sass_transform = self . get_sass_transform ( pilot_theta , pilot_x , sass_expansion_degree ) simulator = lambda theta : sass_transform ( self . _batched_simulator ( theta )) x_o = sass_transform ( x_o ) else : simulator = self . _batched_simulator # Simulate and calculate distances. theta = self . prior . sample (( num_simulations ,)) x = simulator ( theta ) # Infer shape of x to test and set x_o. self . x_shape = x [ 0 ] . unsqueeze ( 0 ) . shape self . x_o = process_x ( x_o , self . x_shape ) distances = self . distance ( self . x_o , x ) # Select based on acceptance threshold epsilon. if eps is not None : is_accepted = distances < eps num_accepted = is_accepted . sum () . item () assert num_accepted > 0 , f \"No parameters accepted, eps= { eps } too small\" theta_accepted = theta [ is_accepted ] distances_accepted = distances [ is_accepted ] x_accepted = x [ is_accepted ] # Select based on quantile on sorted distances. elif quantile is not None : num_top_samples = int ( num_simulations * quantile ) sort_idx = torch . argsort ( distances ) theta_accepted = theta [ sort_idx ][: num_top_samples ] distances_accepted = distances [ sort_idx ][: num_top_samples ] x_accepted = x [ sort_idx ][: num_top_samples ] else : raise ValueError ( \"One of epsilon or quantile has to be passed.\" ) # Maybe adjust theta with LRA. if lra : self . logger . info ( \"Running Linear regression adjustment.\" ) final_theta = self . run_lra ( theta_accepted , x_accepted , observation = self . x_o ) else : final_theta = theta_accepted if kde : self . logger . info ( f \"\"\"KDE on { final_theta . shape [ 0 ] } samples with bandwidth option { kde_kwargs [ \"bandwidth\" ] if \"bandwidth\" in kde_kwargs else \"cv\" } . Beware that KDE can give unreliable results when used with too few samples and in high dimensions.\"\"\" ) kde_dist = get_kde ( final_theta , ** kde_kwargs ) if return_summary : return ( kde_dist , dict ( theta = final_theta , distances = distances_accepted , x = x_accepted ), ) else : return kde_dist elif return_summary : return final_theta , dict ( distances = distances_accepted , x = x_accepted ) else : return final_theta __init__ ( self , simulator , prior , distance = 'l2' , num_workers = 1 , simulation_batch_size = 1 , show_progress_bars = True ) special \u00b6 Monte-Carlo Approximate Bayesian Computation (Rejection ABC) [1]. [1] Pritchard, J. K., Seielstad, M. T., Perez-Lezaun, A., & Feldman, M. W. (1999). Population growth of human Y chromosomes: a study of Y chromosome microsatellites. Molecular biology and evolution, 16(12), 1791-1798. Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\mathrm{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required prior A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with .log_prob() and .sample() (for example, a PyTorch distribution) can be used. required distance Union[str, Callable] Distance function to compare observed and simulated data. Can be a custom function or one of l1 , l2 , mse . 'l2' num_workers int Number of parallel workers to use for simulations. 1 simulation_batch_size int Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). 1 show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/abc/mcabc.py def __init__ ( self , simulator : Callable , prior , distance : Union [ str , Callable ] = \"l2\" , num_workers : int = 1 , simulation_batch_size : int = 1 , show_progress_bars : bool = True , ): r \"\"\"Monte-Carlo Approximate Bayesian Computation (Rejection ABC) [1]. [1] Pritchard, J. K., Seielstad, M. T., Perez-Lezaun, A., & Feldman, M. W. (1999). Population growth of human Y chromosomes: a study of Y chromosome microsatellites. Molecular biology and evolution, 16(12), 1791-1798. Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\mathrm{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. distance: Distance function to compare observed and simulated data. Can be a custom function or one of `l1`, `l2`, `mse`. num_workers: Number of parallel workers to use for simulations. simulation_batch_size: Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" super () . __init__ ( simulator = simulator , prior = prior , distance = distance , num_workers = num_workers , simulation_batch_size = simulation_batch_size , show_progress_bars = show_progress_bars , ) get_distance_function ( distance_type = 'l2' ) inherited \u00b6 Return distance function for given distance type. Parameters: Name Type Description Default distance_type Union[str, Callable] string indicating the distance type, e.g., \u2018l2\u2019, \u2018l1\u2019, \u2018mse\u2019. Note that the returned distance function averages over the last dimension, e.g., over the summary statistics. 'l2' Returns: Type Description distance_fun distance functions built from passe string. Returns distance_type is callable. Source code in sbi/inference/abc/mcabc.py @staticmethod def get_distance_function ( distance_type : Union [ str , Callable ] = \"l2\" ) -> Callable : \"\"\"Return distance function for given distance type. Args: distance_type: string indicating the distance type, e.g., 'l2', 'l1', 'mse'. Note that the returned distance function averages over the last dimension, e.g., over the summary statistics. Returns: distance_fun: distance functions built from passe string. Returns distance_type is callable. \"\"\" if isinstance ( distance_type , Callable ): return distance_type distances = [ \"l1\" , \"l2\" , \"mse\" ] assert ( distance_type in distances ), f \" { distance_type } must be one of { distances } .\" if distance_type == \"mse\" : distance = lambda xo , x : torch . mean (( xo - x ) ** 2 , dim =- 1 ) elif distance_type == \"l2\" : distance = lambda xo , x : torch . norm (( xo - x ), dim =- 1 ) elif distance_type == \"l1\" : distance = lambda xo , x : torch . mean ( abs ( xo - x ), dim =- 1 ) else : raise ValueError ( r \"Distance {distance_type} not supported.\" ) def distance_fun ( observed_data : Tensor , simulated_data : Tensor ) -> Tensor : \"\"\"Return distance over batch dimension. Args: observed_data: Observed data, could be 1D. simulated_data: Batch of simulated data, has batch dimension. Returns: Torch tensor with batch of distances. \"\"\" assert simulated_data . ndim == 2 , \"simulated data needs batch dimension\" return distance ( observed_data , simulated_data ) return distance_fun get_sass_transform ( theta , x , expansion_degree = 1 , sample_weight = None ) inherited \u00b6 Return semi-automatic summary statitics function. Running weighted linear regressin as in Fearnhead & Prandle 2012: https://arxiv.org/abs/1004.1112 Following implementation in https://abcpy.readthedocs.io/en/latest/_modules/abcpy/statistics.html#Identity and https://pythonhosted.org/abcpy/_modules/abcpy/summaryselections.html#Semiautomatic Source code in sbi/inference/abc/mcabc.py @staticmethod def get_sass_transform ( theta : torch . Tensor , x : torch . Tensor , expansion_degree : int = 1 , sample_weight = None , ) -> Callable : \"\"\"Return semi-automatic summary statitics function. Running weighted linear regressin as in Fearnhead & Prandle 2012: https://arxiv.org/abs/1004.1112 Following implementation in https://abcpy.readthedocs.io/en/latest/_modules/abcpy/statistics.html#Identity and https://pythonhosted.org/abcpy/_modules/abcpy/summaryselections.html#Semiautomatic \"\"\" expansion = PolynomialFeatures ( degree = expansion_degree , include_bias = False ) # Transform x, remove intercept. x_expanded = expansion . fit_transform ( x ) sumstats_map = np . zeros (( x_expanded . shape [ 1 ], theta . shape [ 1 ])) for parameter_idx in range ( theta . shape [ 1 ]): regression_model = LinearRegression ( fit_intercept = True ) regression_model . fit ( X = x_expanded , y = theta [:, parameter_idx ], sample_weight = sample_weight ) sumstats_map [:, parameter_idx ] = regression_model . coef_ sumstats_map = torch . tensor ( sumstats_map , dtype = torch . float32 ) def sumstats_transform ( x ): x_expanded = torch . tensor ( expansion . fit_transform ( x ), dtype = torch . float32 ) return x_expanded . mm ( sumstats_map ) return sumstats_transform run_lra ( theta , x , observation , sample_weight = None ) inherited \u00b6 Return parameters adjusted with linear regression adjustment. Implementation as in Beaumont et al. 2002: https://arxiv.org/abs/1707.01254 Source code in sbi/inference/abc/mcabc.py @staticmethod def run_lra ( theta : torch . Tensor , x : torch . Tensor , observation : torch . Tensor , sample_weight = None , ) -> torch . Tensor : \"\"\"Return parameters adjusted with linear regression adjustment. Implementation as in Beaumont et al. 2002: https://arxiv.org/abs/1707.01254 \"\"\" theta_adjusted = theta for parameter_idx in range ( theta . shape [ 1 ]): regression_model = LinearRegression ( fit_intercept = True ) regression_model . fit ( X = x , y = theta [:, parameter_idx ], sample_weight = sample_weight , ) theta_adjusted [:, parameter_idx ] += regression_model . predict ( observation . reshape ( 1 , - 1 ) ) theta_adjusted [:, parameter_idx ] -= regression_model . predict ( x ) return theta_adjusted sbi.inference.abc.smcabc.SMCABC ( ABCBASE ) \u00b6 __call__ ( self , x_o , num_particles , num_initial_pop , num_simulations , epsilon_decay , distance_based_decay = False , ess_min = None , kernel_variance_scale = 1.0 , use_last_pop_samples = True , return_summary = False , kde = False , kde_kwargs = {}, kde_sample_weights = False , lra = False , lra_with_weights = False , sass = False , sass_fraction = 0.25 , sass_expansion_degree = 1 ) special \u00b6 Run SMCABC and return accepted parameters or KDE object fitted on them. Parameters: Name Type Description Default x_o Union[torch.Tensor, numpy.ndarray] Observed data. required num_particles int Number of particles in each population. required num_initial_pop int Number of simulations used for initial population. required num_simulations int Total number of possible simulations. required epsilon_decay float Factor with which the acceptance threshold \\(\\epsilon\\) decays. required distance_based_decay bool Whether the \\(\\epsilon\\) decay is constant over populations or calculated from the previous populations distribution of distances. False ess_min Optional[float] Threshold of effective sampling size for resampling weights. Not used when None (default). None kernel_variance_scale float Factor for scaling the perturbation kernel variance. 1.0 use_last_pop_samples bool Whether to fill up the current population with samples from the previous population when the budget is used up. If False, the current population is discarded and the previous population is returned. True lra bool Whether to run linear regression adjustment as in Beaumont et al. 2002 False lra_with_weights bool Whether to run lra as weighted linear regression with SMC weights False sass bool Whether to determine semi-automatic summary statistics as in Fearnhead & Prangle 2012. False sass_fraction float Fraction of simulation budget used for the initial sass run. 0.25 sass_expansion_degree int Degree of the polynomial feature expansion for the sass regression, default 1 - no expansion. 1 kde bool Whether to run KDE on the accepted parameters to return a KDE object from which one can sample. False kde_kwargs Dict[str, Any] kwargs for performing KDE: \u2018bandwidth=\u2019; either a float, or a string naming a bandwidth heuristics, e.g., \u2018cv\u2019 (cross validation), \u2018silvermann\u2019 or \u2018scott\u2019, default \u2018cv\u2019. \u2018transform\u2019: transform applied to the parameters before doing KDE. \u2018sample_weights\u2019: weights associated with samples. See \u2018get_kde\u2019 for more details {} kde_sample_weights bool Whether perform weighted KDE with SMC weights or on raw particles. False return_summary bool Whether to return a dictionary with all accepted particles, weights, etc. at the end. False Returns: Type Description theta (if kde False) accepted parameters of the last population. kde (if kde True): KDE object fitted on accepted parameters, from which one can .sample() and .log_prob(). summary (if return_summary True): dictionary containing the accepted paramters (if kde True), distances and simulated data x of all populations. Source code in sbi/inference/abc/smcabc.py def __call__ ( self , x_o : Union [ Tensor , ndarray ], num_particles : int , num_initial_pop : int , num_simulations : int , epsilon_decay : float , distance_based_decay : bool = False , ess_min : Optional [ float ] = None , kernel_variance_scale : float = 1.0 , use_last_pop_samples : bool = True , return_summary : bool = False , kde : bool = False , kde_kwargs : Dict [ str , Any ] = {}, kde_sample_weights : bool = False , lra : bool = False , lra_with_weights : bool = False , sass : bool = False , sass_fraction : float = 0.25 , sass_expansion_degree : int = 1 , ) -> Union [ Tensor , KDEWrapper , Tuple [ Tensor , dict ], Tuple [ KDEWrapper , dict ]]: r \"\"\"Run SMCABC and return accepted parameters or KDE object fitted on them. Args: x_o: Observed data. num_particles: Number of particles in each population. num_initial_pop: Number of simulations used for initial population. num_simulations: Total number of possible simulations. epsilon_decay: Factor with which the acceptance threshold $\\epsilon$ decays. distance_based_decay: Whether the $\\epsilon$ decay is constant over populations or calculated from the previous populations distribution of distances. ess_min: Threshold of effective sampling size for resampling weights. Not used when None (default). kernel_variance_scale: Factor for scaling the perturbation kernel variance. use_last_pop_samples: Whether to fill up the current population with samples from the previous population when the budget is used up. If False, the current population is discarded and the previous population is returned. lra: Whether to run linear regression adjustment as in Beaumont et al. 2002 lra_with_weights: Whether to run lra as weighted linear regression with SMC weights sass: Whether to determine semi-automatic summary statistics as in Fearnhead & Prangle 2012. sass_fraction: Fraction of simulation budget used for the initial sass run. sass_expansion_degree: Degree of the polynomial feature expansion for the sass regression, default 1 - no expansion. kde: Whether to run KDE on the accepted parameters to return a KDE object from which one can sample. kde_kwargs: kwargs for performing KDE: 'bandwidth='; either a float, or a string naming a bandwidth heuristics, e.g., 'cv' (cross validation), 'silvermann' or 'scott', default 'cv'. 'transform': transform applied to the parameters before doing KDE. 'sample_weights': weights associated with samples. See 'get_kde' for more details kde_sample_weights: Whether perform weighted KDE with SMC weights or on raw particles. return_summary: Whether to return a dictionary with all accepted particles, weights, etc. at the end. Returns: theta (if kde False): accepted parameters of the last population. kde (if kde True): KDE object fitted on accepted parameters, from which one can .sample() and .log_prob(). summary (if return_summary True): dictionary containing the accepted paramters (if kde True), distances and simulated data x of all populations. \"\"\" pop_idx = 0 self . num_simulations = num_simulations # Pilot run for SASS. if sass : num_pilot_simulations = int ( sass_fraction * num_simulations ) self . logger . info ( f \"Running SASS with { num_pilot_simulations } pilot samples.\" ) sass_transform = self . run_sass_set_xo ( num_particles , num_pilot_simulations , x_o , lra , sass_expansion_degree ) # Udpate simulator and xo x_o = sass_transform ( self . x_o ) def sass_simulator ( theta ): self . simulation_counter += theta . shape [ 0 ] return sass_transform ( self . _batched_simulator ( theta )) self . _simulate_with_budget = sass_simulator # run initial population particles , epsilon , distances , x = self . _set_xo_and_sample_initial_population ( x_o , num_particles , num_initial_pop ) log_weights = torch . log ( 1 / num_particles * ones ( num_particles )) self . logger . info ( ( f \"population= { pop_idx } , eps= { epsilon } , ess= { 1.0 } , \" f \"num_sims= { num_initial_pop } \" ) ) all_particles = [ particles ] all_log_weights = [ log_weights ] all_distances = [ distances ] all_epsilons = [ epsilon ] all_x = [ x ] while self . simulation_counter < self . num_simulations : pop_idx += 1 # Decay based on quantile of distances from previous pop. if distance_based_decay : epsilon = self . _get_next_epsilon ( all_distances [ pop_idx - 1 ], epsilon_decay ) # Constant decay. else : epsilon *= epsilon_decay # Get kernel variance from previous pop. self . kernel_variance = self . get_kernel_variance ( all_particles [ pop_idx - 1 ], torch . exp ( all_log_weights [ pop_idx - 1 ]), samples_per_dim = 500 , kernel_variance_scale = kernel_variance_scale , ) particles , log_weights , distances , x = self . _sample_next_population ( particles = all_particles [ pop_idx - 1 ], log_weights = all_log_weights [ pop_idx - 1 ], distances = all_distances [ pop_idx - 1 ], epsilon = epsilon , x = all_x [ pop_idx - 1 ], use_last_pop_samples = use_last_pop_samples , ) # Resample population if effective sampling size is too small. if ess_min is not None : particles , log_weights = self . resample_if_ess_too_small ( particles , log_weights , ess_min , pop_idx ) self . logger . info ( ( f \"population= { pop_idx } done: eps= { epsilon : .6f } ,\" f \" num_sims= { self . simulation_counter } .\" ) ) # collect results all_particles . append ( particles ) all_log_weights . append ( log_weights ) all_distances . append ( distances ) all_epsilons . append ( epsilon ) all_x . append ( x ) # Maybe run LRA and adjust weights. if lra : self . logger . info ( \"Running Linear regression adjustment.\" ) adjusted_particles , adjusted_weights = self . run_lra_update_weights ( particles = all_particles [ - 1 ], xs = all_x [ - 1 ], observation = process_x ( x_o ), log_weights = all_log_weights [ - 1 ], lra_with_weights = lra_with_weights , ) final_particles = adjusted_particles else : final_particles = all_particles [ - 1 ] if kde : self . logger . info ( f \"\"\"KDE on { final_particles . shape [ 0 ] } samples with bandwidth option { kde_kwargs [ \"bandwidth\" ] if \"bandwidth\" in kde_kwargs else \"cv\" } . Beware that KDE can give unreliable results when used with too few samples and in high dimensions.\"\"\" ) # Maybe get particles weights from last population for weighted KDE. if kde_sample_weights : kde_kwargs [ \"sample_weights\" ] = all_log_weights [ - 1 ] . exp () kde_dist = get_kde ( final_particles , ** kde_kwargs ) if return_summary : return ( kde_dist , dict ( particles = all_particles , weights = all_log_weights , epsilons = all_epsilons , distances = all_distances , xs = all_x , ), ) else : return kde_dist if return_summary : return ( final_particles , dict ( particles = all_particles , weights = all_log_weights , epsilons = all_epsilons , distances = all_distances , xs = all_x , ), ) else : return final_particles __init__ ( self , simulator , prior , distance = 'l2' , num_workers = 1 , simulation_batch_size = 1 , show_progress_bars = True , kernel = 'gaussian' , algorithm_variant = 'C' ) special \u00b6 Sequential Monte Carlo Approximate Bayesian Computation. We distinguish between three different SMC methods here: - A: Toni et al. 2010 (Phd Thesis) - B: Sisson et al. 2007 (with correction from 2009) - C: Beaumont et al. 2009 In Toni et al. 2010 we find an overview of the differences on page 34: - B: same as A except for resampling of weights if the effective sampling size is too small. - C: same as A except for calculation of the covariance of the perturbation kernel: the kernel covariance is a scaled version of the covariance of the previous population. Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\mathrm{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required prior Distribution A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with .log_prob() and .sample() (for example, a PyTorch distribution) can be used. required distance Union[str, Callable] Distance function to compare observed and simulated data. Can be a custom function or one of l1 , l2 , mse . 'l2' num_workers int Number of parallel workers to use for simulations. 1 simulation_batch_size int Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). 1 show_progress_bars bool Whether to show a progressbar during simulation and sampling. True kernel Optional[str] Perturbation kernel. 'gaussian' algorithm_variant str Indicating the choice of algorithm variant, A, B, or C. 'C' Source code in sbi/inference/abc/smcabc.py def __init__ ( self , simulator : Callable , prior : Distribution , distance : Union [ str , Callable ] = \"l2\" , num_workers : int = 1 , simulation_batch_size : int = 1 , show_progress_bars : bool = True , kernel : Optional [ str ] = \"gaussian\" , algorithm_variant : str = \"C\" , ): r \"\"\"Sequential Monte Carlo Approximate Bayesian Computation. We distinguish between three different SMC methods here: - A: Toni et al. 2010 (Phd Thesis) - B: Sisson et al. 2007 (with correction from 2009) - C: Beaumont et al. 2009 In Toni et al. 2010 we find an overview of the differences on page 34: - B: same as A except for resampling of weights if the effective sampling size is too small. - C: same as A except for calculation of the covariance of the perturbation kernel: the kernel covariance is a scaled version of the covariance of the previous population. Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\mathrm{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. distance: Distance function to compare observed and simulated data. Can be a custom function or one of `l1`, `l2`, `mse`. num_workers: Number of parallel workers to use for simulations. simulation_batch_size: Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). show_progress_bars: Whether to show a progressbar during simulation and sampling. kernel: Perturbation kernel. algorithm_variant: Indicating the choice of algorithm variant, A, B, or C. \"\"\" super () . __init__ ( simulator = simulator , prior = prior , distance = distance , num_workers = num_workers , simulation_batch_size = simulation_batch_size , show_progress_bars = show_progress_bars , ) kernels = ( \"gaussian\" , \"uniform\" ) assert ( kernel in kernels ), f \"Kernel ' { kernel } ' not supported. Choose one from { kernels } .\" self . kernel = kernel algorithm_variants = ( \"A\" , \"B\" , \"C\" ) assert algorithm_variant in algorithm_variants , ( f \"SMCABC variant ' { algorithm_variant } ' not supported, choose one from\" \" {algorithm_variants} .\" ) self . algorithm_variant = algorithm_variant self . distance_to_x0 = None self . simulation_counter = 0 self . num_simulations = 0 # Define simulator that keeps track of budget. def simulate_with_budget ( theta ): self . simulation_counter += theta . shape [ 0 ] return self . _batched_simulator ( theta ) self . _simulate_with_budget = simulate_with_budget get_distance_function ( distance_type = 'l2' ) inherited \u00b6 Return distance function for given distance type. Parameters: Name Type Description Default distance_type Union[str, Callable] string indicating the distance type, e.g., \u2018l2\u2019, \u2018l1\u2019, \u2018mse\u2019. Note that the returned distance function averages over the last dimension, e.g., over the summary statistics. 'l2' Returns: Type Description distance_fun distance functions built from passe string. Returns distance_type is callable. Source code in sbi/inference/abc/smcabc.py @staticmethod def get_distance_function ( distance_type : Union [ str , Callable ] = \"l2\" ) -> Callable : \"\"\"Return distance function for given distance type. Args: distance_type: string indicating the distance type, e.g., 'l2', 'l1', 'mse'. Note that the returned distance function averages over the last dimension, e.g., over the summary statistics. Returns: distance_fun: distance functions built from passe string. Returns distance_type is callable. \"\"\" if isinstance ( distance_type , Callable ): return distance_type distances = [ \"l1\" , \"l2\" , \"mse\" ] assert ( distance_type in distances ), f \" { distance_type } must be one of { distances } .\" if distance_type == \"mse\" : distance = lambda xo , x : torch . mean (( xo - x ) ** 2 , dim =- 1 ) elif distance_type == \"l2\" : distance = lambda xo , x : torch . norm (( xo - x ), dim =- 1 ) elif distance_type == \"l1\" : distance = lambda xo , x : torch . mean ( abs ( xo - x ), dim =- 1 ) else : raise ValueError ( r \"Distance {distance_type} not supported.\" ) def distance_fun ( observed_data : Tensor , simulated_data : Tensor ) -> Tensor : \"\"\"Return distance over batch dimension. Args: observed_data: Observed data, could be 1D. simulated_data: Batch of simulated data, has batch dimension. Returns: Torch tensor with batch of distances. \"\"\" assert simulated_data . ndim == 2 , \"simulated data needs batch dimension\" return distance ( observed_data , simulated_data ) return distance_fun get_new_kernel ( self , thetas ) \u00b6 Return new kernel distribution for a given set of paramters. Source code in sbi/inference/abc/smcabc.py def get_new_kernel ( self , thetas : Tensor ) -> Distribution : \"\"\"Return new kernel distribution for a given set of paramters.\"\"\" if self . kernel == \"gaussian\" : assert self . kernel_variance . ndim == 2 return MultivariateNormal ( loc = thetas , covariance_matrix = self . kernel_variance ) elif self . kernel == \"uniform\" : low = thetas - self . kernel_variance high = thetas + self . kernel_variance # Move batch shape to event shape to get Uniform that is multivariate in # parameter dimension. return Uniform ( low = low , high = high ) . to_event ( 1 ) else : raise ValueError ( f \"Kernel, ' { self . kernel } ' not supported.\" ) get_particle_ranges ( self , particles , weights , samples_per_dim = 100 ) \u00b6 Return range of particles in each parameter dimension. Source code in sbi/inference/abc/smcabc.py def get_particle_ranges ( self , particles : Tensor , weights : Tensor , samples_per_dim : int = 100 ) -> Tensor : \"\"\"Return range of particles in each parameter dimension.\"\"\" # get weighted samples samples = self . sample_from_population_with_weights ( particles , weights , num_samples = samples_per_dim * particles . shape [ 1 ], ) # Variance spans the range of particles for every dimension. particle_ranges = samples . max ( 0 ) . values - samples . min ( 0 ) . values assert particle_ranges . ndim < 2 return particle_ranges get_sass_transform ( theta , x , expansion_degree = 1 , sample_weight = None ) inherited \u00b6 Return semi-automatic summary statitics function. Running weighted linear regressin as in Fearnhead & Prandle 2012: https://arxiv.org/abs/1004.1112 Following implementation in https://abcpy.readthedocs.io/en/latest/_modules/abcpy/statistics.html#Identity and https://pythonhosted.org/abcpy/_modules/abcpy/summaryselections.html#Semiautomatic Source code in sbi/inference/abc/smcabc.py @staticmethod def get_sass_transform ( theta : torch . Tensor , x : torch . Tensor , expansion_degree : int = 1 , sample_weight = None , ) -> Callable : \"\"\"Return semi-automatic summary statitics function. Running weighted linear regressin as in Fearnhead & Prandle 2012: https://arxiv.org/abs/1004.1112 Following implementation in https://abcpy.readthedocs.io/en/latest/_modules/abcpy/statistics.html#Identity and https://pythonhosted.org/abcpy/_modules/abcpy/summaryselections.html#Semiautomatic \"\"\" expansion = PolynomialFeatures ( degree = expansion_degree , include_bias = False ) # Transform x, remove intercept. x_expanded = expansion . fit_transform ( x ) sumstats_map = np . zeros (( x_expanded . shape [ 1 ], theta . shape [ 1 ])) for parameter_idx in range ( theta . shape [ 1 ]): regression_model = LinearRegression ( fit_intercept = True ) regression_model . fit ( X = x_expanded , y = theta [:, parameter_idx ], sample_weight = sample_weight ) sumstats_map [:, parameter_idx ] = regression_model . coef_ sumstats_map = torch . tensor ( sumstats_map , dtype = torch . float32 ) def sumstats_transform ( x ): x_expanded = torch . tensor ( expansion . fit_transform ( x ), dtype = torch . float32 ) return x_expanded . mm ( sumstats_map ) return sumstats_transform resample_if_ess_too_small ( self , particles , log_weights , ess_min , pop_idx ) \u00b6 Return resampled particles and uniform weights if effectice sampling size is too small. Source code in sbi/inference/abc/smcabc.py def resample_if_ess_too_small ( self , particles : Tensor , log_weights : Tensor , ess_min : float , pop_idx : int , ) -> Tuple [ Tensor , Tensor ]: \"\"\"Return resampled particles and uniform weights if effectice sampling size is too small. \"\"\" num_particles = particles . shape [ 0 ] ess = ( 1 / torch . sum ( torch . exp ( 2.0 * log_weights ), dim = 0 )) / num_particles # Resampling of weights for low ESS only for Sisson et al. 2007. if ess < ess_min : self . logger . info ( f \"ESS= { ess : .2f } too low, resampling pop { pop_idx } ...\" ) # First resample, then set to uniform weights as in Sisson et al. 2007. particles = self . sample_from_population_with_weights ( particles , torch . exp ( log_weights ), num_samples = num_particles ) log_weights = torch . log ( 1 / num_particles * ones ( num_particles )) return particles , log_weights run_lra ( theta , x , observation , sample_weight = None ) inherited \u00b6 Return parameters adjusted with linear regression adjustment. Implementation as in Beaumont et al. 2002: https://arxiv.org/abs/1707.01254 Source code in sbi/inference/abc/smcabc.py @staticmethod def run_lra ( theta : torch . Tensor , x : torch . Tensor , observation : torch . Tensor , sample_weight = None , ) -> torch . Tensor : \"\"\"Return parameters adjusted with linear regression adjustment. Implementation as in Beaumont et al. 2002: https://arxiv.org/abs/1707.01254 \"\"\" theta_adjusted = theta for parameter_idx in range ( theta . shape [ 1 ]): regression_model = LinearRegression ( fit_intercept = True ) regression_model . fit ( X = x , y = theta [:, parameter_idx ], sample_weight = sample_weight , ) theta_adjusted [:, parameter_idx ] += regression_model . predict ( observation . reshape ( 1 , - 1 ) ) theta_adjusted [:, parameter_idx ] -= regression_model . predict ( x ) return theta_adjusted run_lra_update_weights ( self , particles , xs , observation , log_weights , lra_with_weights ) \u00b6 Return particles and weights adjusted with LRA. Runs (weighted) linear regression from xs onto particles to adjust the particles. Updates the SMC weights according to the new particles. Source code in sbi/inference/abc/smcabc.py def run_lra_update_weights ( self , particles : Tensor , xs : Tensor , observation : Tensor , log_weights : Tensor , lra_with_weights : bool , ) -> Tuple [ Tensor , Tensor ]: \"\"\"Return particles and weights adjusted with LRA. Runs (weighted) linear regression from xs onto particles to adjust the particles. Updates the SMC weights according to the new particles. \"\"\" adjusted_particels = self . run_lra ( theta = particles , x = xs , observation = observation , sample_weight = log_weights . exp () if lra_with_weights else None , ) # Update SMC weights with LRA adjusted weights adjusted_log_weights = self . _calculate_new_log_weights ( new_particles = adjusted_particels , old_particles = particles , old_log_weights = log_weights , ) return adjusted_particels , adjusted_log_weights run_sass_set_xo ( self , num_particles , num_pilot_simulations , x_o , lra = False , sass_expansion_degree = 1 ) \u00b6 Return transform for semi-automatic summary statistics. Runs an single round of rejection abc with fixed budget and accepts num_particles simulations to run the regression for sass. Sets self.x_o once the x_shape can be derived from simulations. Source code in sbi/inference/abc/smcabc.py def run_sass_set_xo ( self , num_particles : int , num_pilot_simulations : int , x_o , lra : bool = False , sass_expansion_degree : int = 1 , ) -> Callable : \"\"\"Return transform for semi-automatic summary statistics. Runs an single round of rejection abc with fixed budget and accepts num_particles simulations to run the regression for sass. Sets self.x_o once the x_shape can be derived from simulations. \"\"\" ( pilot_particles , _ , _ , pilot_xs , ) = self . _set_xo_and_sample_initial_population ( x_o , num_particles , num_pilot_simulations ) # Adjust with LRA. if lra : pilot_particles = self . run_lra ( pilot_particles , pilot_xs , self . x_o ) sass_transform = self . get_sass_transform ( pilot_particles , pilot_xs , expansion_degree = sass_expansion_degree , sample_weight = None , ) return sass_transform sample_from_population_with_weights ( particles , weights , num_samples = 1 ) staticmethod \u00b6 Return samples from particles sampled with weights. Source code in sbi/inference/abc/smcabc.py @staticmethod def sample_from_population_with_weights ( particles : Tensor , weights : Tensor , num_samples : int = 1 ) -> Tensor : \"\"\"Return samples from particles sampled with weights.\"\"\" # define multinomial with weights as probs multi = Multinomial ( probs = weights ) # sample num samples, with replacement samples = multi . sample ( sample_shape = torch . Size (( num_samples ,))) # get indices of success trials indices = torch . where ( samples )[ 1 ] # return those indices from trace return particles [ indices ] Posteriors \u00b6 sbi.inference.posteriors.direct_posterior.DirectPosterior ( NeuralPosterior ) \u00b6 Posterior \\(p(\\theta|x_o)\\) with log_prob() and sample() methods, only applicable to SNPE. SNPE trains a neural network to directly approximate the posterior distribution. However, for bounded priors, the neural network can have leakage: it puts non-zero mass in regions where the prior is zero. The DirectPosterior class wraps the trained network to deal with these cases. Specifically, this class offers the following functionality: - correct the calculation of the log probability such that it compensates for the leakage. - reject samples that lie outside of the prior bounds. This class can not be used in combination with SNLE or SNRE. default_x : Optional [ torch . Tensor ] inherited property writable \u00b6 Return default x used by .sample(), .log_prob as conditioning context. __init__ ( self , posterior_estimator , prior , max_sampling_batch_size = 10000 , device = None , x_shape = None , enable_transform = True ) special \u00b6 Parameters: Name Type Description Default prior Distribution Prior distribution with .log_prob() and .sample() . required posterior_estimator Flow The trained neural posterior. required max_sampling_batch_size int Batchsize of samples being drawn from the proposal at every iteration. 10000 device Optional[str] Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:0\u201d. If None, potential_fn.device is used. None x_shape Optional[torch.Size] Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. None enable_transform bool Whether to transform parameters to unconstrained space during MAP optimization. When False, an identity transform will be returned for theta_transform . True Source code in sbi/inference/posteriors/direct_posterior.py def __init__ ( self , posterior_estimator : flows . Flow , prior : Distribution , max_sampling_batch_size : int = 10_000 , device : Optional [ str ] = None , x_shape : Optional [ torch . Size ] = None , enable_transform : bool = True , ): \"\"\" Args: prior: Prior distribution with `.log_prob()` and `.sample()`. posterior_estimator: The trained neural posterior. max_sampling_batch_size: Batchsize of samples being drawn from the proposal at every iteration. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:0\". If None, `potential_fn.device` is used. x_shape: Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. enable_transform: Whether to transform parameters to unconstrained space during MAP optimization. When False, an identity transform will be returned for `theta_transform`. \"\"\" # Because `DirectPosterior` does not take the `potential_fn` as input, it # builds it itself. The `potential_fn` and `theta_transform` are used only for # obtaining the MAP. check_prior ( prior ) potential_fn , theta_transform = posterior_estimator_based_potential ( posterior_estimator , prior , x_o = None , enable_transform = enable_transform , ) super () . __init__ ( potential_fn = potential_fn , theta_transform = theta_transform , device = device , x_shape = x_shape , ) self . prior = prior self . posterior_estimator = posterior_estimator self . max_sampling_batch_size = max_sampling_batch_size self . _leakage_density_correction_factor = None self . _purpose = \"\"\"It samples the posterior network and rejects samples that lie outside of the prior bounds.\"\"\" leakage_correction ( self , x , num_rejection_samples = 10000 , force_update = False , show_progress_bars = False , rejection_sampling_batch_size = 10000 ) \u00b6 Return leakage correction factor for a leaky posterior density estimate. The factor is estimated from the acceptance probability during rejection sampling from the posterior. This is to avoid re-estimating the acceptance probability from scratch whenever log_prob is called and norm_posterior=True . Here, it is estimated only once for self.default_x and saved for later. We re-evaluate only whenever a new x is passed. Parameters: Name Type Description Default num_rejection_samples int Number of samples used to estimate correction factor. 10000 show_progress_bars bool Whether to show a progress bar during sampling. False rejection_sampling_batch_size int Batch size for rejection sampling. 10000 Returns: Type Description Tensor Saved or newly-estimated correction factor (as a scalar Tensor ). Source code in sbi/inference/posteriors/direct_posterior.py @torch . no_grad () def leakage_correction ( self , x : Tensor , num_rejection_samples : int = 10_000 , force_update : bool = False , show_progress_bars : bool = False , rejection_sampling_batch_size : int = 10_000 , ) -> Tensor : r \"\"\"Return leakage correction factor for a leaky posterior density estimate. The factor is estimated from the acceptance probability during rejection sampling from the posterior. This is to avoid re-estimating the acceptance probability from scratch whenever `log_prob` is called and `norm_posterior=True`. Here, it is estimated only once for `self.default_x` and saved for later. We re-evaluate only whenever a new `x` is passed. Arguments: num_rejection_samples: Number of samples used to estimate correction factor. show_progress_bars: Whether to show a progress bar during sampling. rejection_sampling_batch_size: Batch size for rejection sampling. Returns: Saved or newly-estimated correction factor (as a scalar `Tensor`). \"\"\" def acceptance_at ( x : Tensor ) -> Tensor : return accept_reject_sample ( proposal = self . posterior_estimator , accept_reject_fn = lambda theta : within_support ( self . prior , theta ), num_samples = num_rejection_samples , show_progress_bars = show_progress_bars , sample_for_correction_factor = True , max_sampling_batch_size = rejection_sampling_batch_size , proposal_sampling_kwargs = { \"context\" : x }, )[ 1 ] # Check if the provided x matches the default x (short-circuit on identity). is_new_x = self . default_x is None or ( x is not self . default_x and ( x != self . default_x ) . any () ) not_saved_at_default_x = self . _leakage_density_correction_factor is None if is_new_x : # Calculate at x; don't save. return acceptance_at ( x ) elif not_saved_at_default_x or force_update : # Calculate at default_x; save. assert self . default_x is not None self . _leakage_density_correction_factor = acceptance_at ( self . default_x ) return self . _leakage_density_correction_factor # type: ignore log_prob ( self , theta , x = None , norm_posterior = True , track_gradients = False , leakage_correction_params = None ) \u00b6 Returns the log-probability of the posterior \\(p(\\theta|x)\\) . Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required norm_posterior bool Whether to enforce a normalized posterior density. Renormalization of the posterior is useful when some probability falls out or leaks out of the prescribed prior support. The normalizing factor is calculated via rejection sampling, so if you need speedier but unnormalized log posterior estimates set here norm_posterior=False . The returned log posterior is set to -\u221e outside of the prior support regardless of this setting. True track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False leakage_correction_params Optional[dict] A dict of keyword arguments to override the default values of leakage_correction() . Possible options are: num_rejection_samples , force_update , show_progress_bars , and rejection_sampling_batch_size . These parameters only have an effect if norm_posterior=True . None Returns: Type Description Tensor (len(\u03b8),) -shaped log posterior probability \\(\\log p(\\theta|x)\\) for \u03b8 in the support of the prior, -\u221e (corresponding to 0 probability) outside. Source code in sbi/inference/posteriors/direct_posterior.py def log_prob ( self , theta : Tensor , x : Optional [ Tensor ] = None , norm_posterior : bool = True , track_gradients : bool = False , leakage_correction_params : Optional [ dict ] = None , ) -> Tensor : r \"\"\"Returns the log-probability of the posterior $p(\\theta|x)$. Args: theta: Parameters $\\theta$. norm_posterior: Whether to enforce a normalized posterior density. Renormalization of the posterior is useful when some probability falls out or leaks out of the prescribed prior support. The normalizing factor is calculated via rejection sampling, so if you need speedier but unnormalized log posterior estimates set here `norm_posterior=False`. The returned log posterior is set to -\u221e outside of the prior support regardless of this setting. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. leakage_correction_params: A `dict` of keyword arguments to override the default values of `leakage_correction()`. Possible options are: `num_rejection_samples`, `force_update`, `show_progress_bars`, and `rejection_sampling_batch_size`. These parameters only have an effect if `norm_posterior=True`. Returns: `(len(\u03b8),)`-shaped log posterior probability $\\log p(\\theta|x)$ for \u03b8 in the support of the prior, -\u221e (corresponding to 0 probability) outside. \"\"\" x = self . _x_else_default_x ( x ) # TODO Train exited here, entered after sampling? self . posterior_estimator . eval () theta = ensure_theta_batched ( torch . as_tensor ( theta )) theta_repeated , x_repeated = match_theta_and_x_batch_shapes ( theta , x ) with torch . set_grad_enabled ( track_gradients ): # Evaluate on device, move back to cpu for comparison with prior. unnorm_log_prob = self . posterior_estimator . log_prob ( theta_repeated , context = x_repeated ) # Force probability to be zero outside prior support. in_prior_support = within_support ( self . prior , theta_repeated ) masked_log_prob = torch . where ( in_prior_support , unnorm_log_prob , torch . tensor ( float ( \"-inf\" ), dtype = torch . float32 , device = self . _device ), ) if leakage_correction_params is None : leakage_correction_params = dict () # use defaults log_factor = ( log ( self . leakage_correction ( x = x , ** leakage_correction_params )) if norm_posterior else 0 ) return masked_log_prob - log_factor map ( self , x = None , num_iter = 1000 , num_to_optimize = 100 , learning_rate = 0.01 , init_method = 'posterior' , num_init_samples = 1000 , save_best_every = 10 , show_progress_bars = False , force_update = False ) \u00b6 Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map() . The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a BoxUniform , we carry out the optimization in unbounded space and transform the result back into bounded space. Parameters: Name Type Description Default x Optional[torch.Tensor] Deprecated - use .set_default_x() prior to .map() . None num_iter int Number of optimization steps that the algorithm takes to find the MAP. 1000 learning_rate float Learning rate of the optimizer. 0.01 init_method Union[str, torch.Tensor] How to select the starting parameters for the optimization. If it is a string, it can be either [ posterior , prior ], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations. 'posterior' num_init_samples int Draw this number of samples from the posterior and evaluate the log-probability of all of them. 1000 num_to_optimize int From the drawn num_init_samples , use the num_to_optimize with highest log-probability as the initial points for the optimization. 100 save_best_every int The best log-probability is computed, saved in the map -attribute, and printed every save_best_every -th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10 .) 10 show_progress_bars bool Whether to show a progressbar during sampling from the posterior. False force_update bool Whether to re-calculate the MAP when x is unchanged and have a cached value. False log_prob_kwargs Will be empty for SNLE and SNRE. Will contain {\u2018norm_posterior\u2019: True} for SNPE. required Returns: Type Description Tensor The MAP estimate. Source code in sbi/inference/posteriors/direct_posterior.py def map ( self , x : Optional [ Tensor ] = None , num_iter : int = 1_000 , num_to_optimize : int = 100 , learning_rate : float = 0.01 , init_method : Union [ str , Tensor ] = \"posterior\" , num_init_samples : int = 1_000 , save_best_every : int = 10 , show_progress_bars : bool = False , force_update : bool = False , ) -> Tensor : r \"\"\"Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in `self._map` and can be accessed with `self.map()`. The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a `BoxUniform`, we carry out the optimization in unbounded space and transform the result back into bounded space. Args: x: Deprecated - use `.set_default_x()` prior to `.map()`. num_iter: Number of optimization steps that the algorithm takes to find the MAP. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples the respective distribution `num_init_samples` times. If it is a tensor, the tensor will be used as init locations. num_init_samples: Draw this number of samples from the posterior and evaluate the log-probability of all of them. num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` with highest log-probability as the initial points for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is `10`.) show_progress_bars: Whether to show a progressbar during sampling from the posterior. force_update: Whether to re-calculate the MAP when x is unchanged and have a cached value. log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain {'norm_posterior': True} for SNPE. Returns: The MAP estimate. \"\"\" return super () . map ( x = x , num_iter = num_iter , num_to_optimize = num_to_optimize , learning_rate = learning_rate , init_method = init_method , num_init_samples = num_init_samples , save_best_every = save_best_every , show_progress_bars = show_progress_bars , force_update = force_update , ) potential ( self , theta , x = None , track_gradients = False ) inherited \u00b6 Evaluates \\(\\theta\\) under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of \\(\\theta\\) under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Source code in sbi/inference/posteriors/direct_posterior.py def potential ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Evaluates $\\theta$ under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of $\\theta$ under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) sample ( self , sample_shape = torch . Size ([]), x = None , max_sampling_batch_size = 10000 , sample_with = None , show_progress_bars = True ) \u00b6 Return samples from posterior distribution \\(p(\\theta|x)\\) . Parameters: Name Type Description Default sample_shape Union[torch.Size, Tuple[int, ...]] Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw sample_shape.numel() samples and then reshape into the desired shape. torch.Size([]) sample_with Optional[str] This argument only exists to keep backward-compatibility with sbi v0.17.2 or older. If it is set, we instantly raise an error. None show_progress_bars bool Whether to show sampling progress monitor. True Source code in sbi/inference/posteriors/direct_posterior.py def sample ( self , sample_shape : Shape = torch . Size (), x : Optional [ Tensor ] = None , max_sampling_batch_size : int = 10_000 , sample_with : Optional [ str ] = None , show_progress_bars : bool = True , ) -> Tensor : r \"\"\"Return samples from posterior distribution $p(\\theta|x)$. Args: sample_shape: Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw `sample_shape.numel()` samples and then reshape into the desired shape. sample_with: This argument only exists to keep backward-compatibility with `sbi` v0.17.2 or older. If it is set, we instantly raise an error. show_progress_bars: Whether to show sampling progress monitor. \"\"\" num_samples = torch . Size ( sample_shape ) . numel () x = self . _x_else_default_x ( x ) max_sampling_batch_size = ( self . max_sampling_batch_size if max_sampling_batch_size is None else max_sampling_batch_size ) if sample_with is not None : raise ValueError ( f \"You set `sample_with= { sample_with } `. As of sbi v0.18.0, setting \" f \"`sample_with` is no longer supported. You have to rerun \" f \"`.build_posterior(sample_with= { sample_with } ).`\" ) samples = accept_reject_sample ( proposal = self . posterior_estimator , accept_reject_fn = lambda theta : within_support ( self . prior , theta ), num_samples = num_samples , show_progress_bars = show_progress_bars , max_sampling_batch_size = max_sampling_batch_size , proposal_sampling_kwargs = { \"context\" : x }, alternative_method = \"build_posterior(..., sample_with='mcmc')\" , )[ 0 ] return samples set_default_x ( self , x ) inherited \u00b6 Set new default x for .sample(), .log_prob to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify x in calls to .sample() and .log_prob() - only $ heta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular x=x_o (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like posterior.set_default_x(my_x).sample(mytheta) are possible. Parameters: Name Type Description Default x Tensor The default observation to set for the posterior \\(p( heta|x)\\) . required Returns: Type Description NeuralPosterior NeuralPosterior that will use a default x when not explicitly passed. Source code in sbi/inference/posteriors/direct_posterior.py def set_default_x ( self , x : Tensor ) -> \"NeuralPosterior\" : \"\"\"Set new default x for `.sample(), .log_prob` to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify `x` in calls to `.sample()` and `.log_prob()` - only $\\theta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular `x=x_o` (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like `posterior.set_default_x(my_x).sample(mytheta)` are possible. Args: x: The default observation to set for the posterior $p(\\theta|x)$. Returns: `NeuralPosterior` that will use a default `x` when not explicitly passed. \"\"\" self . _x = process_x ( x , x_shape = self . _x_shape , allow_iid_x = self . potential_fn . allow_iid_x ) . to ( self . _device ) self . _map = None return self sbi.inference.posteriors.importance_posterior.ImportanceSamplingPosterior ( NeuralPosterior ) \u00b6 Provides importance sampling to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). ImportanceSamplingPosterior allows to estimate the posterior log-probability by estimating the normlalization constant with importance sampling. It also allows to perform importance sampling (with .sample() ) and to draw approximate samples with sampling-importance-resampling (SIR) (with .sir_sample() ) default_x : Optional [ torch . Tensor ] inherited property writable \u00b6 Return default x used by .sample(), .log_prob as conditioning context. __init__ ( self , potential_fn , proposal , theta_transform = None , method = 'sir' , oversampling_factor = 32 , max_sampling_batch_size = 10000 , device = None , x_shape = None ) special \u00b6 Parameters: Name Type Description Default potential_fn Callable The potential function from which to draw samples. required proposal Any The proposal distribution. required theta_transform Optional[torch Transform] Transformation that is applied to parameters. Is not used during but only when calling .map() . None method str Either of [ sir | importance ]. This sets the behavior of the .sample() method. With sir , approximate posterior samples are generated with sampling importance resampling (SIR). With importance , the .sample() method returns a tuple of samples and corresponding importance weights. 'sir' oversampling_factor int Number of proposed samples from which only one is selected based on its importance weight. 32 max_sampling_batch_size int The batch size of samples being drawn from the proposal at every iteration. 10000 device Optional[str] Device on which to sample, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:0\u201d. If None, potential_fn.device is used. None x_shape Optional[torch.Size] Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. None Source code in sbi/inference/posteriors/importance_posterior.py def __init__ ( self , potential_fn : Callable , proposal : Any , theta_transform : Optional [ TorchTransform ] = None , method : str = \"sir\" , oversampling_factor : int = 32 , max_sampling_batch_size : int = 10_000 , device : Optional [ str ] = None , x_shape : Optional [ torch . Size ] = None , ): \"\"\" Args: potential_fn: The potential function from which to draw samples. proposal: The proposal distribution. theta_transform: Transformation that is applied to parameters. Is not used during but only when calling `.map()`. method: Either of [`sir`|`importance`]. This sets the behavior of the `.sample()` method. With `sir`, approximate posterior samples are generated with sampling importance resampling (SIR). With `importance`, the `.sample()` method returns a tuple of samples and corresponding importance weights. oversampling_factor: Number of proposed samples from which only one is selected based on its importance weight. max_sampling_batch_size: The batch size of samples being drawn from the proposal at every iteration. device: Device on which to sample, e.g., \"cpu\", \"cuda\" or \"cuda:0\". If None, `potential_fn.device` is used. x_shape: Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. \"\"\" super () . __init__ ( potential_fn , theta_transform = theta_transform , device = device , x_shape = x_shape , ) self . proposal = proposal self . _normalization_constant = None self . method = method self . oversampling_factor = oversampling_factor self . max_sampling_batch_size = max_sampling_batch_size self . _purpose = ( \"It provides sampling-importance resampling (SIR) to .sample() from the \" \"posterior and can evaluate the _unnormalized_ posterior density with \" \".log_prob().\" ) estimate_normalization_constant ( self , x , num_samples = 10000 , force_update = False ) \u00b6 Returns the normalization constant via importance sampling. Parameters: Name Type Description Default num_samples int Number of importance samples used for the estimate. 10000 force_update bool Whether to re-calculate the normlization constant when x is unchanged and have a cached value. False Source code in sbi/inference/posteriors/importance_posterior.py @torch . no_grad () def estimate_normalization_constant ( self , x : Tensor , num_samples : int = 10_000 , force_update : bool = False ) -> Tensor : \"\"\"Returns the normalization constant via importance sampling. Args: num_samples: Number of importance samples used for the estimate. force_update: Whether to re-calculate the normlization constant when x is unchanged and have a cached value. \"\"\" # Check if the provided x matches the default x (short-circuit on identity). is_new_x = self . default_x is None or ( x is not self . default_x and ( x != self . default_x ) . any () ) not_saved_at_default_x = self . _normalization_constant is None if is_new_x : # Calculate at x; don't save. _ , log_importance_weights = importance_sample ( self . potential_fn , proposal = self . proposal , num_samples = num_samples , ) return torch . mean ( torch . exp ( log_importance_weights )) elif not_saved_at_default_x or force_update : # Calculate at default_x; save. assert self . default_x is not None _ , log_importance_weights = importance_sample ( self . potential_fn , proposal = self . proposal , num_samples = num_samples , ) self . _normalization_constant = torch . mean ( torch . exp ( log_importance_weights )) return self . _normalization_constant . to ( self . _device ) # type: ignore log_prob ( self , theta , x = None , track_gradients = False , normalization_constant_params = None ) \u00b6 Returns the log-probability of theta under the posterior. The normalization constant is estimated with importance sampling. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False normalization_constant_params Optional[dict] Parameters passed on to estimate_normalization_constant() . None Returns: Type Description Tensor len($\\theta$) -shaped log-probability. Source code in sbi/inference/posteriors/importance_posterior.py def log_prob ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False , normalization_constant_params : Optional [ dict ] = None , ) -> Tensor : r \"\"\"Returns the log-probability of theta under the posterior. The normalization constant is estimated with importance sampling. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. normalization_constant_params: Parameters passed on to `estimate_normalization_constant()`. Returns: `len($\\theta$)`-shaped log-probability. \"\"\" x = self . _x_else_default_x ( x ) self . potential_fn . set_x ( x ) theta = ensure_theta_batched ( torch . as_tensor ( theta )) with torch . set_grad_enabled ( track_gradients ): potential_values = self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) if normalization_constant_params is None : normalization_constant_params = dict () # use defaults normalization_constant = self . estimate_normalization_constant ( x , ** normalization_constant_params ) return ( potential_values - torch . log ( normalization_constant )) . to ( self . _device ) map ( self , x = None , num_iter = 1000 , num_to_optimize = 100 , learning_rate = 0.01 , init_method = 'proposal' , num_init_samples = 1000 , save_best_every = 10 , show_progress_bars = False , force_update = False ) \u00b6 Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map() . The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a BoxUniform , we carry out the optimization in unbounded space and transform the result back into bounded space. Parameters: Name Type Description Default x Optional[torch.Tensor] Deprecated - use .set_default_x() prior to .map() . None num_iter int Number of optimization steps that the algorithm takes to find the MAP. 1000 learning_rate float Learning rate of the optimizer. 0.01 init_method Union[str, torch.Tensor] How to select the starting parameters for the optimization. If it is a string, it can be either [ posterior , prior ], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations. 'proposal' num_init_samples int Draw this number of samples from the posterior and evaluate the log-probability of all of them. 1000 num_to_optimize int From the drawn num_init_samples , use the num_to_optimize with highest log-probability as the initial points for the optimization. 100 save_best_every int The best log-probability is computed, saved in the map -attribute, and printed every save_best_every -th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10 .) 10 show_progress_bars bool Whether to show a progressbar during sampling from the posterior. False force_update bool Whether to re-calculate the MAP when x is unchanged and have a cached value. False log_prob_kwargs Will be empty for SNLE and SNRE. Will contain {\u2018norm_posterior\u2019: True} for SNPE. required Returns: Type Description Tensor The MAP estimate. Source code in sbi/inference/posteriors/importance_posterior.py def map ( self , x : Optional [ Tensor ] = None , num_iter : int = 1_000 , num_to_optimize : int = 100 , learning_rate : float = 0.01 , init_method : Union [ str , Tensor ] = \"proposal\" , num_init_samples : int = 1_000 , save_best_every : int = 10 , show_progress_bars : bool = False , force_update : bool = False , ) -> Tensor : r \"\"\"Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in `self._map` and can be accessed with `self.map()`. The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a `BoxUniform`, we carry out the optimization in unbounded space and transform the result back into bounded space. Args: x: Deprecated - use `.set_default_x()` prior to `.map()`. num_iter: Number of optimization steps that the algorithm takes to find the MAP. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples the respective distribution `num_init_samples` times. If it is a tensor, the tensor will be used as init locations. num_init_samples: Draw this number of samples from the posterior and evaluate the log-probability of all of them. num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` with highest log-probability as the initial points for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is `10`.) show_progress_bars: Whether to show a progressbar during sampling from the posterior. force_update: Whether to re-calculate the MAP when x is unchanged and have a cached value. log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain {'norm_posterior': True} for SNPE. Returns: The MAP estimate. \"\"\" return super () . map ( x = x , num_iter = num_iter , num_to_optimize = num_to_optimize , learning_rate = learning_rate , init_method = init_method , num_init_samples = num_init_samples , save_best_every = save_best_every , show_progress_bars = show_progress_bars , force_update = force_update , ) potential ( self , theta , x = None , track_gradients = False ) inherited \u00b6 Evaluates \\(\\theta\\) under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of \\(\\theta\\) under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Source code in sbi/inference/posteriors/importance_posterior.py def potential ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Evaluates $\\theta$ under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of $\\theta$ under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) sample ( self , sample_shape = torch . Size ([]), x = None , oversampling_factor = 32 , max_sampling_batch_size = 10000 , sample_with = None ) \u00b6 Return samples from the approximate posterior distribution. Parameters: Name Type Description Default sample_shape Union[torch.Size, Tuple[int, ...]] description torch.Size([]) x Optional[torch.Tensor] description None Source code in sbi/inference/posteriors/importance_posterior.py def sample ( self , sample_shape : Shape = torch . Size (), x : Optional [ Tensor ] = None , oversampling_factor : int = 32 , max_sampling_batch_size : int = 10_000 , sample_with : Optional [ str ] = None , ) -> Union [ Tensor , Tuple [ Tensor , Tensor ]]: \"\"\"Return samples from the approximate posterior distribution. Args: sample_shape: _description_ x: _description_ \"\"\" if sample_with is not None : raise ValueError ( f \"You set `sample_with= { sample_with } `. As of sbi v0.18.0, setting \" f \"`sample_with` is no longer supported. You have to rerun \" f \"`.build_posterior(sample_with= { sample_with } ).`\" ) self . potential_fn . set_x ( self . _x_else_default_x ( x )) if self . method == \"sir\" : return self . _sir_sample ( sample_shape , oversampling_factor = oversampling_factor , max_sampling_batch_size = max_sampling_batch_size , ) elif self . method == \"importance\" : return self . _importance_sample ( sample_shape ) else : raise NameError set_default_x ( self , x ) inherited \u00b6 Set new default x for .sample(), .log_prob to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify x in calls to .sample() and .log_prob() - only $ heta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular x=x_o (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like posterior.set_default_x(my_x).sample(mytheta) are possible. Parameters: Name Type Description Default x Tensor The default observation to set for the posterior \\(p( heta|x)\\) . required Returns: Type Description NeuralPosterior NeuralPosterior that will use a default x when not explicitly passed. Source code in sbi/inference/posteriors/importance_posterior.py def set_default_x ( self , x : Tensor ) -> \"NeuralPosterior\" : \"\"\"Set new default x for `.sample(), .log_prob` to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify `x` in calls to `.sample()` and `.log_prob()` - only $\\theta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular `x=x_o` (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like `posterior.set_default_x(my_x).sample(mytheta)` are possible. Args: x: The default observation to set for the posterior $p(\\theta|x)$. Returns: `NeuralPosterior` that will use a default `x` when not explicitly passed. \"\"\" self . _x = process_x ( x , x_shape = self . _x_shape , allow_iid_x = self . potential_fn . allow_iid_x ) . to ( self . _device ) self . _map = None return self sbi.inference.posteriors.mcmc_posterior.MCMCPosterior ( NeuralPosterior ) \u00b6 Provides MCMC to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). MCMCPosterior allows to sample from the posterior with MCMC. default_x : Optional [ torch . Tensor ] inherited property writable \u00b6 Return default x used by .sample(), .log_prob as conditioning context. mcmc_method : str property writable \u00b6 Returns MCMC method. posterior_sampler property readonly \u00b6 Returns sampler created by sample . __init__ ( self , potential_fn , proposal , theta_transform = None , method = 'slice_np' , thin = 10 , warmup_steps = 10 , num_chains = 1 , init_strategy = 'resample' , init_strategy_parameters = {}, init_strategy_num_candidates = None , num_workers = 1 , device = None , x_shape = None ) special \u00b6 Parameters: Name Type Description Default potential_fn Callable The potential function from which to draw samples. required proposal Any Proposal distribution that is used to initialize the MCMC chain. required theta_transform Optional[torch Transform] Transformation that will be applied during sampling. Allows to perform MCMC in unconstrained space. None method str Method used for MCMC sampling, one of slice_np , slice_np_vectorized , slice , hmc , nuts . slice_np is a custom numpy implementation of slice sampling. slice_np_vectorized is identical to slice_np , but if num_chains>1 , the chains are vectorized for slice_np_vectorized whereas they are run sequentially for slice_np . The samplers hmc , nuts or slice sample with Pyro. 'slice_np' thin int The thinning factor for the chain. 10 warmup_steps int The initial number of samples to discard. 10 num_chains int The number of chains. 1 init_strategy str The initialisation strategy for chains; proposal will draw init locations from proposal , whereas sir will use Sequential- Importance-Resampling (SIR). SIR initially samples init_strategy_num_candidates from the proposal , evaluates all of them under the potential_fn and proposal , and then resamples the initial locations with weights proportional to exp(potential_fn - proposal.log_prob . resample is the same as sir but uses exp(potential_fn) as weights. 'resample' init_strategy_parameters Dict[str, Any] Dictionary of keyword arguments passed to the init strategy, e.g., for init_strategy=sir this could be num_candidate_samples , i.e., the number of candidates to to find init locations (internal default is 1000 ), or device . {} init_strategy_num_candidates Optional[int] Number of candidates to to find init locations in init_strategy=sir (deprecated, use init_strategy_parameters instead). None num_workers int number of cpu cores used to parallelize mcmc 1 device Optional[str] Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:0\u201d. If None, potential_fn.device is used. None x_shape Optional[torch.Size] Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. None Source code in sbi/inference/posteriors/mcmc_posterior.py def __init__ ( self , potential_fn : Callable , proposal : Any , theta_transform : Optional [ TorchTransform ] = None , method : str = \"slice_np\" , thin : int = 10 , warmup_steps : int = 10 , num_chains : int = 1 , init_strategy : str = \"resample\" , init_strategy_parameters : Dict [ str , Any ] = {}, init_strategy_num_candidates : Optional [ int ] = None , num_workers : int = 1 , device : Optional [ str ] = None , x_shape : Optional [ torch . Size ] = None , ): \"\"\" Args: potential_fn: The potential function from which to draw samples. proposal: Proposal distribution that is used to initialize the MCMC chain. theta_transform: Transformation that will be applied during sampling. Allows to perform MCMC in unconstrained space. method: Method used for MCMC sampling, one of `slice_np`, `slice_np_vectorized`, `slice`, `hmc`, `nuts`. `slice_np` is a custom numpy implementation of slice sampling. `slice_np_vectorized` is identical to `slice_np`, but if `num_chains>1`, the chains are vectorized for `slice_np_vectorized` whereas they are run sequentially for `slice_np`. The samplers `hmc`, `nuts` or `slice` sample with Pyro. thin: The thinning factor for the chain. warmup_steps: The initial number of samples to discard. num_chains: The number of chains. init_strategy: The initialisation strategy for chains; `proposal` will draw init locations from `proposal`, whereas `sir` will use Sequential- Importance-Resampling (SIR). SIR initially samples `init_strategy_num_candidates` from the `proposal`, evaluates all of them under the `potential_fn` and `proposal`, and then resamples the initial locations with weights proportional to `exp(potential_fn - proposal.log_prob`. `resample` is the same as `sir` but uses `exp(potential_fn)` as weights. init_strategy_parameters: Dictionary of keyword arguments passed to the init strategy, e.g., for `init_strategy=sir` this could be `num_candidate_samples`, i.e., the number of candidates to to find init locations (internal default is `1000`), or `device`. init_strategy_num_candidates: Number of candidates to to find init locations in `init_strategy=sir` (deprecated, use init_strategy_parameters instead). num_workers: number of cpu cores used to parallelize mcmc device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:0\". If None, `potential_fn.device` is used. x_shape: Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. \"\"\" super () . __init__ ( potential_fn , theta_transform = theta_transform , device = device , x_shape = x_shape , ) self . proposal = proposal self . method = method self . thin = thin self . warmup_steps = warmup_steps self . num_chains = num_chains self . init_strategy = init_strategy self . init_strategy_parameters = init_strategy_parameters self . num_workers = num_workers self . _posterior_sampler = None # Hardcode parameter name to reduce clutter kwargs. self . param_name = \"theta\" if init_strategy_num_candidates is not None : warn ( \"\"\"Passing `init_strategy_num_candidates` is deprecated as of sbi v0.19.0. Instead, use e.g., `init_strategy_parameters={\"num_candidate_samples\": 1000}`\"\"\" ) self . init_strategy_parameters [ \"num_candidate_samples\" ] = init_strategy_num_candidates self . potential_ = self . _prepare_potential ( method ) self . _purpose = ( \"It provides MCMC to .sample() from the posterior and \" \"can evaluate the _unnormalized_ posterior density with .log_prob().\" ) get_arviz_inference_data ( self ) \u00b6 Returns arviz InferenceData object constructed most recent samples. Note: the InferenceData is constructed using the posterior samples generated in most recent call to .sample(...) . For Pyro HMC and NUTS kernels InferenceData will contain diagnostics, for Pyro Slice or sbi slice sampling samples, only the samples are added. Returns: Type Description inference_data Arviz InferenceData object. Source code in sbi/inference/posteriors/mcmc_posterior.py def get_arviz_inference_data ( self ) -> InferenceData : \"\"\"Returns arviz InferenceData object constructed most recent samples. Note: the InferenceData is constructed using the posterior samples generated in most recent call to `.sample(...)`. For Pyro HMC and NUTS kernels InferenceData will contain diagnostics, for Pyro Slice or sbi slice sampling samples, only the samples are added. Returns: inference_data: Arviz InferenceData object. \"\"\" assert ( self . _posterior_sampler is not None ), \"\"\"No samples have been generated, call .sample() first.\"\"\" sampler : Union [ MCMC , SliceSamplerSerial , SliceSamplerVectorized ] = self . _posterior_sampler # If Pyro sampler and samples not transformed, use arviz' from_pyro. # Exclude 'slice' kernel as it lacks the 'divergence' diagnostics key. if isinstance ( self . _posterior_sampler , ( HMC , NUTS )) and isinstance ( self . theta_transform , torch_tf . IndependentTransform ): inference_data = az . from_pyro ( sampler ) # otherwise get samples from sampler and transform to original space. else : transformed_samples = sampler . get_samples ( group_by_chain = True ) # Pyro samplers returns dicts, get values. if isinstance ( transformed_samples , Dict ): # popitem gets last items, [1] get the values as tensor. transformed_samples = transformed_samples . popitem ()[ 1 ] # Our slice samplers return numpy arrays. elif isinstance ( transformed_samples , ndarray ): transformed_samples = torch . from_numpy ( transformed_samples ) . type ( torch . float32 ) # For MultipleIndependent priors transforms first dim must be batch dim. # thus, reshape back and forth to have batch dim in front. samples_shape = transformed_samples . shape samples = self . theta_transform . inv ( # type: ignore transformed_samples . reshape ( - 1 , samples_shape [ - 1 ]) ) . reshape ( # type: ignore * samples_shape ) inference_data = az . convert_to_inference_data ( { f \" { self . param_name } \" : samples } ) return inference_data log_prob ( self , theta , x = None , track_gradients = False ) \u00b6 Returns the log-probability of theta under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Returns: Type Description Tensor len($\\theta$) -shaped log-probability. Source code in sbi/inference/posteriors/mcmc_posterior.py def log_prob ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Returns the log-probability of theta under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. Returns: `len($\\theta$)`-shaped log-probability. \"\"\" warn ( \"\"\"`.log_prob()` is deprecated for methods that can only evaluate the log-probability up to a normalizing constant. Use `.potential()` instead.\"\"\" ) warn ( \"The log-probability is unnormalized!\" ) self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) map ( self , x = None , num_iter = 1000 , num_to_optimize = 100 , learning_rate = 0.01 , init_method = 'proposal' , num_init_samples = 1000 , save_best_every = 10 , show_progress_bars = False , force_update = False ) \u00b6 Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map() . The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a BoxUniform , we carry out the optimization in unbounded space and transform the result back into bounded space. Parameters: Name Type Description Default x Optional[torch.Tensor] Deprecated - use .set_default_x() prior to .map() . None num_iter int Number of optimization steps that the algorithm takes to find the MAP. 1000 learning_rate float Learning rate of the optimizer. 0.01 init_method Union[str, torch.Tensor] How to select the starting parameters for the optimization. If it is a string, it can be either [ posterior , prior ], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations. 'proposal' num_init_samples int Draw this number of samples from the posterior and evaluate the log-probability of all of them. 1000 num_to_optimize int From the drawn num_init_samples , use the num_to_optimize with highest log-probability as the initial points for the optimization. 100 save_best_every int The best log-probability is computed, saved in the map -attribute, and printed every save_best_every -th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10 .) 10 show_progress_bars bool Whether to show a progressbar during sampling from the posterior. False force_update bool Whether to re-calculate the MAP when x is unchanged and have a cached value. False log_prob_kwargs Will be empty for SNLE and SNRE. Will contain {\u2018norm_posterior\u2019: True} for SNPE. required Returns: Type Description Tensor The MAP estimate. Source code in sbi/inference/posteriors/mcmc_posterior.py def map ( self , x : Optional [ Tensor ] = None , num_iter : int = 1_000 , num_to_optimize : int = 100 , learning_rate : float = 0.01 , init_method : Union [ str , Tensor ] = \"proposal\" , num_init_samples : int = 1_000 , save_best_every : int = 10 , show_progress_bars : bool = False , force_update : bool = False , ) -> Tensor : r \"\"\"Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in `self._map` and can be accessed with `self.map()`. The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a `BoxUniform`, we carry out the optimization in unbounded space and transform the result back into bounded space. Args: x: Deprecated - use `.set_default_x()` prior to `.map()`. num_iter: Number of optimization steps that the algorithm takes to find the MAP. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples the respective distribution `num_init_samples` times. If it is a tensor, the tensor will be used as init locations. num_init_samples: Draw this number of samples from the posterior and evaluate the log-probability of all of them. num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` with highest log-probability as the initial points for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is `10`.) show_progress_bars: Whether to show a progressbar during sampling from the posterior. force_update: Whether to re-calculate the MAP when x is unchanged and have a cached value. log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain {'norm_posterior': True} for SNPE. Returns: The MAP estimate. \"\"\" return super () . map ( x = x , num_iter = num_iter , num_to_optimize = num_to_optimize , learning_rate = learning_rate , init_method = init_method , num_init_samples = num_init_samples , save_best_every = save_best_every , show_progress_bars = show_progress_bars , force_update = force_update , ) potential ( self , theta , x = None , track_gradients = False ) inherited \u00b6 Evaluates \\(\\theta\\) under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of \\(\\theta\\) under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Source code in sbi/inference/posteriors/mcmc_posterior.py def potential ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Evaluates $\\theta$ under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of $\\theta$ under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) sample ( self , sample_shape = torch . Size ([]), x = None , method = None , thin = None , warmup_steps = None , num_chains = None , init_strategy = None , init_strategy_parameters = None , init_strategy_num_candidates = None , mcmc_parameters = {}, mcmc_method = None , sample_with = None , num_workers = None , show_progress_bars = True ) \u00b6 Return samples from posterior distribution \\(p(\\theta|x)\\) with MCMC. Check the __init__() method for a description of all arguments as well as their default values. Parameters: Name Type Description Default sample_shape Union[torch.Size, Tuple[int, ...]] Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw sample_shape.numel() samples and then reshape into the desired shape. torch.Size([]) mcmc_parameters Dict Dictionary that is passed only to support the API of sbi v0.17.2 or older. {} mcmc_method Optional[str] This argument only exists to keep backward-compatibility with sbi v0.17.2 or older. Please use method instead. None sample_with Optional[str] This argument only exists to keep backward-compatibility with sbi v0.17.2 or older. If it is set, we instantly raise an error. None show_progress_bars bool Whether to show sampling progress monitor. True Returns: Type Description Tensor Samples from posterior. Source code in sbi/inference/posteriors/mcmc_posterior.py def sample ( self , sample_shape : Shape = torch . Size (), x : Optional [ Tensor ] = None , method : Optional [ str ] = None , thin : Optional [ int ] = None , warmup_steps : Optional [ int ] = None , num_chains : Optional [ int ] = None , init_strategy : Optional [ str ] = None , init_strategy_parameters : Optional [ Dict [ str , Any ]] = None , init_strategy_num_candidates : Optional [ int ] = None , mcmc_parameters : Dict = {}, mcmc_method : Optional [ str ] = None , sample_with : Optional [ str ] = None , num_workers : Optional [ int ] = None , show_progress_bars : bool = True , ) -> Tensor : r \"\"\"Return samples from posterior distribution $p(\\theta|x)$ with MCMC. Check the `__init__()` method for a description of all arguments as well as their default values. Args: sample_shape: Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw `sample_shape.numel()` samples and then reshape into the desired shape. mcmc_parameters: Dictionary that is passed only to support the API of `sbi` v0.17.2 or older. mcmc_method: This argument only exists to keep backward-compatibility with `sbi` v0.17.2 or older. Please use `method` instead. sample_with: This argument only exists to keep backward-compatibility with `sbi` v0.17.2 or older. If it is set, we instantly raise an error. show_progress_bars: Whether to show sampling progress monitor. Returns: Samples from posterior. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) # Replace arguments that were not passed with their default. method = self . method if method is None else method thin = self . thin if thin is None else thin warmup_steps = self . warmup_steps if warmup_steps is None else warmup_steps num_chains = self . num_chains if num_chains is None else num_chains init_strategy = self . init_strategy if init_strategy is None else init_strategy num_workers = self . num_workers if num_workers is None else num_workers init_strategy_parameters = ( self . init_strategy_parameters if init_strategy_parameters is None else init_strategy_parameters ) if init_strategy_num_candidates is not None : warn ( \"\"\"Passing `init_strategy_num_candidates` is deprecated as of sbi v0.19.0. Instead, use e.g., `init_strategy_parameters={\"num_candidate_samples\": 1000}`\"\"\" ) self . init_strategy_parameters [ \"num_candidate_samples\" ] = init_strategy_num_candidates if sample_with is not None : raise ValueError ( f \"You set `sample_with= { sample_with } `. As of sbi v0.18.0, setting \" f \"`sample_with` is no longer supported. You have to rerun \" f \"`.build_posterior(sample_with= { sample_with } ).`\" ) if mcmc_method is not None : warn ( \"You passed `mcmc_method` to `.sample()`. As of sbi v0.18.0, this \" \"is deprecated and will be removed in a future release. Use `method` \" \"instead of `mcmc_method`.\" ) method = mcmc_method if mcmc_parameters : warn ( \"You passed `mcmc_parameters` to `.sample()`. As of sbi v0.18.0, this \" \"is deprecated and will be removed in a future release. Instead, pass \" \"the variable to `.sample()` directly, e.g. \" \"`posterior.sample((1,), num_chains=5)`.\" ) # The following lines are only for backwards compatibility with sbi v0.17.2 or # older. m_p = mcmc_parameters # define to shorten the variable name method = _maybe_use_dict_entry ( method , \"mcmc_method\" , m_p ) thin = _maybe_use_dict_entry ( thin , \"thin\" , m_p ) warmup_steps = _maybe_use_dict_entry ( warmup_steps , \"warmup_steps\" , m_p ) num_chains = _maybe_use_dict_entry ( num_chains , \"num_chains\" , m_p ) init_strategy = _maybe_use_dict_entry ( init_strategy , \"init_strategy\" , m_p ) self . potential_ = self . _prepare_potential ( method ) # type: ignore initial_params = self . _get_initial_params ( init_strategy , # type: ignore num_chains , # type: ignore num_workers , show_progress_bars , ** init_strategy_parameters , ) num_samples = torch . Size ( sample_shape ) . numel () track_gradients = method in ( \"hmc\" , \"nuts\" ) with torch . set_grad_enabled ( track_gradients ): if method in ( \"slice_np\" , \"slice_np_vectorized\" ): transformed_samples = self . _slice_np_mcmc ( num_samples = num_samples , potential_function = self . potential_ , initial_params = initial_params , thin = thin , # type: ignore warmup_steps = warmup_steps , # type: ignore vectorized = ( method == \"slice_np_vectorized\" ), num_workers = num_workers , show_progress_bars = show_progress_bars , ) elif method in ( \"hmc\" , \"nuts\" , \"slice\" ): transformed_samples = self . _pyro_mcmc ( num_samples = num_samples , potential_function = self . potential_ , initial_params = initial_params , mcmc_method = method , # type: ignore thin = thin , # type: ignore warmup_steps = warmup_steps , # type: ignore num_chains = num_chains , show_progress_bars = show_progress_bars , ) else : raise NameError samples = self . theta_transform . inv ( transformed_samples ) return samples . reshape (( * sample_shape , - 1 )) # type: ignore set_default_x ( self , x ) inherited \u00b6 Set new default x for .sample(), .log_prob to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify x in calls to .sample() and .log_prob() - only $ heta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular x=x_o (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like posterior.set_default_x(my_x).sample(mytheta) are possible. Parameters: Name Type Description Default x Tensor The default observation to set for the posterior \\(p( heta|x)\\) . required Returns: Type Description NeuralPosterior NeuralPosterior that will use a default x when not explicitly passed. Source code in sbi/inference/posteriors/mcmc_posterior.py def set_default_x ( self , x : Tensor ) -> \"NeuralPosterior\" : \"\"\"Set new default x for `.sample(), .log_prob` to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify `x` in calls to `.sample()` and `.log_prob()` - only $\\theta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular `x=x_o` (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like `posterior.set_default_x(my_x).sample(mytheta)` are possible. Args: x: The default observation to set for the posterior $p(\\theta|x)$. Returns: `NeuralPosterior` that will use a default `x` when not explicitly passed. \"\"\" self . _x = process_x ( x , x_shape = self . _x_shape , allow_iid_x = self . potential_fn . allow_iid_x ) . to ( self . _device ) self . _map = None return self set_mcmc_method ( self , method ) \u00b6 Sets sampling method to for MCMC and returns NeuralPosterior . Parameters: Name Type Description Default method str Method to use. required Returns: Type Description NeuralPosterior NeuralPosterior for chainable calls. Source code in sbi/inference/posteriors/mcmc_posterior.py def set_mcmc_method ( self , method : str ) -> \"NeuralPosterior\" : \"\"\"Sets sampling method to for MCMC and returns `NeuralPosterior`. Args: method: Method to use. Returns: `NeuralPosterior` for chainable calls. \"\"\" self . _mcmc_method = method return self sbi.inference.posteriors.rejection_posterior.RejectionPosterior ( NeuralPosterior ) \u00b6 Provides rejection sampling to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). RejectionPosterior allows to sample from the posterior with rejection sampling. default_x : Optional [ torch . Tensor ] inherited property writable \u00b6 Return default x used by .sample(), .log_prob as conditioning context. __init__ ( self , potential_fn , proposal , theta_transform = None , max_sampling_batch_size = 10000 , num_samples_to_find_max = 10000 , num_iter_to_find_max = 100 , m = 1.2 , device = None , x_shape = None ) special \u00b6 Parameters: Name Type Description Default potential_fn Callable The potential function from which to draw samples. required proposal Any The proposal distribution. required theta_transform Optional[torch Transform] Transformation that is applied to parameters. Is not used during but only when calling .map() . None max_sampling_batch_size int The batchsize of samples being drawn from the proposal at every iteration. 10000 num_samples_to_find_max int The number of samples that are used to find the maximum of the potential_fn / proposal ratio. 10000 num_iter_to_find_max int The number of gradient ascent iterations to find the maximum of the potential_fn / proposal ratio. 100 m float Multiplier to the potential_fn / proposal ratio. 1.2 device Optional[str] Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:0\u201d. If None, potential_fn.device is used. None x_shape Optional[torch.Size] Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. None Source code in sbi/inference/posteriors/rejection_posterior.py def __init__ ( self , potential_fn : Callable , proposal : Any , theta_transform : Optional [ TorchTransform ] = None , max_sampling_batch_size : int = 10_000 , num_samples_to_find_max : int = 10_000 , num_iter_to_find_max : int = 100 , m : float = 1.2 , device : Optional [ str ] = None , x_shape : Optional [ torch . Size ] = None , ): \"\"\" Args: potential_fn: The potential function from which to draw samples. proposal: The proposal distribution. theta_transform: Transformation that is applied to parameters. Is not used during but only when calling `.map()`. max_sampling_batch_size: The batchsize of samples being drawn from the proposal at every iteration. num_samples_to_find_max: The number of samples that are used to find the maximum of the `potential_fn / proposal` ratio. num_iter_to_find_max: The number of gradient ascent iterations to find the maximum of the `potential_fn / proposal` ratio. m: Multiplier to the `potential_fn / proposal` ratio. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:0\". If None, `potential_fn.device` is used. x_shape: Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. \"\"\" super () . __init__ ( potential_fn , theta_transform = theta_transform , device = device , x_shape = x_shape , ) self . proposal = proposal self . max_sampling_batch_size = max_sampling_batch_size self . num_samples_to_find_max = num_samples_to_find_max self . num_iter_to_find_max = num_iter_to_find_max self . m = m self . _purpose = ( \"It provides rejection sampling to .sample() from the posterior and \" \"can evaluate the _unnormalized_ posterior density with .log_prob().\" ) log_prob ( self , theta , x = None , track_gradients = False ) \u00b6 Returns the log-probability of theta under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Returns: Type Description Tensor len($\\theta$) -shaped log-probability. Source code in sbi/inference/posteriors/rejection_posterior.py def log_prob ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Returns the log-probability of theta under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. Returns: `len($\\theta$)`-shaped log-probability. \"\"\" warn ( \"`.log_prob()` is deprecated for methods that can only evaluate the log-probability up to a normalizing constant. Use `.potential()` instead.\" ) warn ( \"The log-probability is unnormalized!\" ) self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) map ( self , x = None , num_iter = 1000 , num_to_optimize = 100 , learning_rate = 0.01 , init_method = 'proposal' , num_init_samples = 1000 , save_best_every = 10 , show_progress_bars = False , force_update = False ) \u00b6 Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map() . The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a BoxUniform , we carry out the optimization in unbounded space and transform the result back into bounded space. Parameters: Name Type Description Default x Optional[torch.Tensor] Deprecated - use .set_default_x() prior to .map() . None num_iter int Number of optimization steps that the algorithm takes to find the MAP. 1000 learning_rate float Learning rate of the optimizer. 0.01 init_method Union[str, torch.Tensor] How to select the starting parameters for the optimization. If it is a string, it can be either [ posterior , prior ], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations. 'proposal' num_init_samples int Draw this number of samples from the posterior and evaluate the log-probability of all of them. 1000 num_to_optimize int From the drawn num_init_samples , use the num_to_optimize with highest log-probability as the initial points for the optimization. 100 save_best_every int The best log-probability is computed, saved in the map -attribute, and printed every save_best_every -th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10 .) 10 show_progress_bars bool Whether to show a progressbar during sampling from the posterior. False force_update bool Whether to re-calculate the MAP when x is unchanged and have a cached value. False log_prob_kwargs Will be empty for SNLE and SNRE. Will contain {\u2018norm_posterior\u2019: True} for SNPE. required Returns: Type Description Tensor The MAP estimate. Source code in sbi/inference/posteriors/rejection_posterior.py def map ( self , x : Optional [ Tensor ] = None , num_iter : int = 1_000 , num_to_optimize : int = 100 , learning_rate : float = 0.01 , init_method : Union [ str , Tensor ] = \"proposal\" , num_init_samples : int = 1_000 , save_best_every : int = 10 , show_progress_bars : bool = False , force_update : bool = False , ) -> Tensor : r \"\"\"Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in `self._map` and can be accessed with `self.map()`. The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a `BoxUniform`, we carry out the optimization in unbounded space and transform the result back into bounded space. Args: x: Deprecated - use `.set_default_x()` prior to `.map()`. num_iter: Number of optimization steps that the algorithm takes to find the MAP. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples the respective distribution `num_init_samples` times. If it is a tensor, the tensor will be used as init locations. num_init_samples: Draw this number of samples from the posterior and evaluate the log-probability of all of them. num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` with highest log-probability as the initial points for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is `10`.) show_progress_bars: Whether to show a progressbar during sampling from the posterior. force_update: Whether to re-calculate the MAP when x is unchanged and have a cached value. log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain {'norm_posterior': True} for SNPE. Returns: The MAP estimate. \"\"\" return super () . map ( x = x , num_iter = num_iter , num_to_optimize = num_to_optimize , learning_rate = learning_rate , init_method = init_method , num_init_samples = num_init_samples , save_best_every = save_best_every , show_progress_bars = show_progress_bars , force_update = force_update , ) potential ( self , theta , x = None , track_gradients = False ) inherited \u00b6 Evaluates \\(\\theta\\) under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of \\(\\theta\\) under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Source code in sbi/inference/posteriors/rejection_posterior.py def potential ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Evaluates $\\theta$ under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of $\\theta$ under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) sample ( self , sample_shape = torch . Size ([]), x = None , max_sampling_batch_size = None , num_samples_to_find_max = None , num_iter_to_find_max = None , m = None , sample_with = None , show_progress_bars = True ) \u00b6 Return samples from posterior \\(p(\\theta|x)\\) via rejection sampling. Parameters: Name Type Description Default sample_shape Union[torch.Size, Tuple[int, ...]] Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw sample_shape.numel() samples and then reshape into the desired shape. torch.Size([]) sample_with Optional[str] This argument only exists to keep backward-compatibility with sbi v0.17.2 or older. If it is set, we instantly raise an error. None show_progress_bars bool Whether to show sampling progress monitor. True Returns: Type Description Samples from posterior. Source code in sbi/inference/posteriors/rejection_posterior.py def sample ( self , sample_shape : Shape = torch . Size (), x : Optional [ Tensor ] = None , max_sampling_batch_size : Optional [ int ] = None , num_samples_to_find_max : Optional [ int ] = None , num_iter_to_find_max : Optional [ int ] = None , m : Optional [ float ] = None , sample_with : Optional [ str ] = None , show_progress_bars : bool = True , ): r \"\"\"Return samples from posterior $p(\\theta|x)$ via rejection sampling. Args: sample_shape: Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw `sample_shape.numel()` samples and then reshape into the desired shape. sample_with: This argument only exists to keep backward-compatibility with `sbi` v0.17.2 or older. If it is set, we instantly raise an error. show_progress_bars: Whether to show sampling progress monitor. Returns: Samples from posterior. \"\"\" num_samples = torch . Size ( sample_shape ) . numel () self . potential_fn . set_x ( self . _x_else_default_x ( x )) potential = partial ( self . potential_fn , track_gradients = True ) if sample_with is not None : raise ValueError ( f \"You set `sample_with= { sample_with } `. As of sbi v0.18.0, setting \" f \"`sample_with` is no longer supported. You have to rerun \" f \"`.build_posterior(sample_with= { sample_with } ).`\" ) # Replace arguments that were not passed with their default. max_sampling_batch_size = ( self . max_sampling_batch_size if max_sampling_batch_size is None else max_sampling_batch_size ) num_samples_to_find_max = ( self . num_samples_to_find_max if num_samples_to_find_max is None else num_samples_to_find_max ) num_iter_to_find_max = ( self . num_iter_to_find_max if num_iter_to_find_max is None else num_iter_to_find_max ) m = self . m if m is None else m samples , _ = rejection_sample ( potential , proposal = self . proposal , num_samples = num_samples , show_progress_bars = show_progress_bars , warn_acceptance = 0.01 , max_sampling_batch_size = max_sampling_batch_size , num_samples_to_find_max = num_samples_to_find_max , num_iter_to_find_max = num_iter_to_find_max , m = m , device = self . _device , ) return samples . reshape (( * sample_shape , - 1 )) set_default_x ( self , x ) inherited \u00b6 Set new default x for .sample(), .log_prob to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify x in calls to .sample() and .log_prob() - only $ heta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular x=x_o (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like posterior.set_default_x(my_x).sample(mytheta) are possible. Parameters: Name Type Description Default x Tensor The default observation to set for the posterior \\(p( heta|x)\\) . required Returns: Type Description NeuralPosterior NeuralPosterior that will use a default x when not explicitly passed. Source code in sbi/inference/posteriors/rejection_posterior.py def set_default_x ( self , x : Tensor ) -> \"NeuralPosterior\" : \"\"\"Set new default x for `.sample(), .log_prob` to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify `x` in calls to `.sample()` and `.log_prob()` - only $\\theta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular `x=x_o` (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like `posterior.set_default_x(my_x).sample(mytheta)` are possible. Args: x: The default observation to set for the posterior $p(\\theta|x)$. Returns: `NeuralPosterior` that will use a default `x` when not explicitly passed. \"\"\" self . _x = process_x ( x , x_shape = self . _x_shape , allow_iid_x = self . potential_fn . allow_iid_x ) . to ( self . _device ) self . _map = None return self sbi.inference.posteriors.vi_posterior.VIPosterior ( NeuralPosterior ) \u00b6 Provides VI (Variational Inference) to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). VIPosterior allows to learn a tractable variational posterior \\(q(\\theta)\\) which approximates the true posterior \\(p(\\theta|x_o)\\) . After this second training stage, we can produce approximate posterior samples, by just sampling from q with no additional cost. For additional information see [1] and [2]. References: [1] Variational methods for simulation-based inference, Manuel Gl\u00f6ckler, Michael Deistler, Jakob Macke, 2022, https://openreview.net/forum?id=kZ0UYdhqkNY [2] Sequential Neural Posterior and Likelihood Approximation, Samuel Wiqvist, Jes Frellsen, Umberto Picchini, 2021, https://arxiv.org/abs/2102.06522 default_x : Optional [ torch . Tensor ] inherited property writable \u00b6 Return default x used by .sample(), .log_prob as conditioning context. q : Distribution property writable \u00b6 Returns the variational posterior. vi_method : str property writable \u00b6 Variational inference method e.g. one of [rKL, fKL, IW, alpha]. __init__ ( self , potential_fn , prior = None , q = 'maf' , theta_transform = None , vi_method = 'rKL' , device = 'cpu' , x_shape = None , parameters = [], modules = []) special \u00b6 Parameters: Name Type Description Default potential_fn Callable The potential function from which to draw samples. required prior Optional[torch Distribution] This is the prior distribution. Note that this is only used to check/construct the variational distribution or within some quality metrics. Please make sure that this matches with the prior within the potential_fn. If None is given, we will try to infer it from potential_fn or q, if this fails we raise an Error. None q Union[str, pyro.distributions.torch.TransformedDistribution, VIPosterior, Callable] Variational distribution, either string, TransformedDistribution , or a VIPosterior object. This specifies a parametric class of distribution over which the best possible posterior approximation is searched. For string input, we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. You can also specify your own variational family by passing a pyro TransformedDistribution . Additionally, we allow a Callable , which allows you the pass a builder function, which if called returns a distribution. This may be useful for setting the hyperparameters e.g. num_transfroms within the get_flow_builder method specifying the number of transformations within a normalizing flow. If q is already a VIPosterior , then the arguments will be copied from it (relevant for multi-round training). 'maf' theta_transform Optional[torch Transform] Maps form prior support to unconstrained space. The inverse is used here to ensure that the posterior support is equal to that of the prior. None vi_method str This specifies the variational methods which are used to fit q to the posterior. We currently support [rKL, fKL, IW, alpha]. Note that some of the divergences are mode seeking i.e. they underestimate variance and collapse on multimodal targets ( rKL , alpha for alpha > 1) and some are mass covering i.e. they overestimate variance but typically cover all modes ( fKL , IW , alpha for alpha < 1). 'rKL' device str Training device, e.g., cpu , cuda or cuda:0 . We will ensure that all other objects are also on this device. 'cpu' x_shape Optional[torch.Size] Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. None parameters Iterable List of parameters of the variational posterior. This is only required for user-defined q i.e. if q does not have a parameters attribute. [] modules Iterable List of modules of the variational posterior. This is only required for user-defined q i.e. if q does not have a modules attribute. [] Source code in sbi/inference/posteriors/vi_posterior.py def __init__ ( self , potential_fn : Callable , prior : Optional [ TorchDistribution ] = None , q : Union [ str , PyroTransformedDistribution , \"VIPosterior\" , Callable ] = \"maf\" , theta_transform : Optional [ TorchTransform ] = None , vi_method : str = \"rKL\" , device : str = \"cpu\" , x_shape : Optional [ torch . Size ] = None , parameters : Iterable = [], modules : Iterable = [], ): \"\"\" Args: potential_fn: The potential function from which to draw samples. prior: This is the prior distribution. Note that this is only used to check/construct the variational distribution or within some quality metrics. Please make sure that this matches with the prior within the potential_fn. If `None` is given, we will try to infer it from potential_fn or q, if this fails we raise an Error. q: Variational distribution, either string, `TransformedDistribution`, or a `VIPosterior` object. This specifies a parametric class of distribution over which the best possible posterior approximation is searched. For string input, we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. You can also specify your own variational family by passing a pyro `TransformedDistribution`. Additionally, we allow a `Callable`, which allows you the pass a `builder` function, which if called returns a distribution. This may be useful for setting the hyperparameters e.g. `num_transfroms` within the `get_flow_builder` method specifying the number of transformations within a normalizing flow. If q is already a `VIPosterior`, then the arguments will be copied from it (relevant for multi-round training). theta_transform: Maps form prior support to unconstrained space. The inverse is used here to ensure that the posterior support is equal to that of the prior. vi_method: This specifies the variational methods which are used to fit q to the posterior. We currently support [rKL, fKL, IW, alpha]. Note that some of the divergences are `mode seeking` i.e. they underestimate variance and collapse on multimodal targets (`rKL`, `alpha` for alpha > 1) and some are `mass covering` i.e. they overestimate variance but typically cover all modes (`fKL`, `IW`, `alpha` for alpha < 1). device: Training device, e.g., `cpu`, `cuda` or `cuda:0`. We will ensure that all other objects are also on this device. x_shape: Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. parameters: List of parameters of the variational posterior. This is only required for user-defined q i.e. if q does not have a `parameters` attribute. modules: List of modules of the variational posterior. This is only required for user-defined q i.e. if q does not have a `modules` attribute. \"\"\" super () . __init__ ( potential_fn , theta_transform , device , x_shape = x_shape ) # Especially the prior may be on another device -> move it... self . _device = device self . potential_fn . device = device move_all_tensor_to_device ( self . potential_fn , device ) # Get prior and previous builds if prior is not None : self . _prior = prior elif hasattr ( self . potential_fn , \"prior\" ) and isinstance ( self . potential_fn . prior , Distribution ): self . _prior = self . potential_fn . prior elif isinstance ( q , VIPosterior ) and isinstance ( q . _prior , Distribution ): self . _prior = q . _prior else : raise ValueError ( \"We could not find a suitable prior distribution within `potential_fn`\" \"or `q` (if a VIPosterior is given). Please explicitly specify a prior.\" ) move_all_tensor_to_device ( self . _prior , device ) self . _optimizer = None # In contrast to MCMC we want to project into constrained space. if theta_transform is None : self . link_transform = mcmc_transform ( self . _prior ) . inv else : self . link_transform = theta_transform . inv # This will set the variational distribution and VI method self . set_q ( q , parameters = parameters , modules = modules ) self . set_vi_method ( vi_method ) self . _purpose = ( \"It provides Variational inference to .sample() from the posterior and \" \"can evaluate the _normalized_ posterior density with .log_prob().\" ) evaluate ( self , quality_control_metric = 'psis' , N = 50000 ) \u00b6 This function will evaluate the quality of the variational posterior distribution. We currently support two different metrics of type psis , which checks the quality based on the tails of importance weights (there should not be much with a large one), or prop which checks the proportionality between q and potential_fn. NOTE: In our experience prop is sensitive to distinguish good from ok whereas psis is more sensitive in distinguishing very bad from ok . Parameters: Name Type Description Default quality_control_metric str The metric of choice, we currently support [psis, prop, prop_prior]. 'psis' N int Number of samples which is used to evaluate the metric. 50000 Source code in sbi/inference/posteriors/vi_posterior.py def evaluate ( self , quality_control_metric : str = \"psis\" , N : int = int ( 5e4 )) -> None : \"\"\"This function will evaluate the quality of the variational posterior distribution. We currently support two different metrics of type `psis`, which checks the quality based on the tails of importance weights (there should not be much with a large one), or `prop` which checks the proportionality between q and potential_fn. NOTE: In our experience `prop` is sensitive to distinguish ``good`` from ``ok`` whereas `psis` is more sensitive in distinguishing `very bad` from `ok`. Args: quality_control_metric: The metric of choice, we currently support [psis, prop, prop_prior]. N: Number of samples which is used to evaluate the metric. \"\"\" quality_control_fn , quality_control_msg = get_quality_metric ( quality_control_metric ) metric = round ( float ( quality_control_fn ( self , N = N )), 3 ) print ( f \"Quality Score: { metric } \" + quality_control_msg ) log_prob ( self , theta , x = None , track_gradients = False ) \u00b6 Returns the log-probability of theta under the variational posterior. Parameters: Name Type Description Default theta Tensor Parameters required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis but increases memory consumption. False Returns: Type Description Tensor len($\\theta$) -shaped log-probability. Source code in sbi/inference/posteriors/vi_posterior.py def log_prob ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False , ) -> Tensor : r \"\"\"Returns the log-probability of theta under the variational posterior. Args: theta: Parameters track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis but increases memory consumption. Returns: `len($\\theta$)`-shaped log-probability. \"\"\" x = self . _x_else_default_x ( x ) if self . _trained_on is None or ( x != self . _trained_on ) . all (): raise AttributeError ( f \"The variational posterior was not fit using observation { x } . \\ Please train.\" ) with torch . set_grad_enabled ( track_gradients ): theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . q . log_prob ( theta ) map ( self , x = None , num_iter = 1000 , num_to_optimize = 100 , learning_rate = 0.01 , init_method = 'proposal' , num_init_samples = 10000 , save_best_every = 10 , show_progress_bars = False , force_update = False ) \u00b6 Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map() . The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a BoxUniform , we carry out the optimization in unbounded space and transform the result back into bounded space. Parameters: Name Type Description Default x Optional[Tensor] Deprecated - use .set_default_x() prior to .map() . None num_iter int Number of optimization steps that the algorithm takes to find the MAP. 1000 learning_rate float Learning rate of the optimizer. 0.01 init_method Union[str, Tensor] How to select the starting parameters for the optimization. If it is a string, it can be either [ posterior , prior ], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations. 'proposal' num_init_samples int Draw this number of samples from the posterior and evaluate the log-probability of all of them. 10000 num_to_optimize int From the drawn num_init_samples , use the num_to_optimize with highest log-probability as the initial points for the optimization. 100 save_best_every int The best log-probability is computed, saved in the map -attribute, and printed every save_best_every -th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10 .) 10 show_progress_bars bool Whether to show a progressbar during sampling from the posterior. False force_update bool Whether to re-calculate the MAP when x is unchanged and have a cached value. False log_prob_kwargs Will be empty for SNLE and SNRE. Will contain {\u2018norm_posterior\u2019: True} for SNPE. required Returns: Type Description Tensor The MAP estimate. Source code in sbi/inference/posteriors/vi_posterior.py def map ( self , x : Optional [ TorchTensor ] = None , num_iter : int = 1_000 , num_to_optimize : int = 100 , learning_rate : float = 0.01 , init_method : Union [ str , TorchTensor ] = \"proposal\" , num_init_samples : int = 10_000 , save_best_every : int = 10 , show_progress_bars : bool = False , force_update : bool = False , ) -> Tensor : r \"\"\"Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in `self._map` and can be accessed with `self.map()`. The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a `BoxUniform`, we carry out the optimization in unbounded space and transform the result back into bounded space. Args: x: Deprecated - use `.set_default_x()` prior to `.map()`. num_iter: Number of optimization steps that the algorithm takes to find the MAP. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples the respective distribution `num_init_samples` times. If it is a tensor, the tensor will be used as init locations. num_init_samples: Draw this number of samples from the posterior and evaluate the log-probability of all of them. num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` with highest log-probability as the initial points for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is `10`.) show_progress_bars: Whether to show a progressbar during sampling from the posterior. force_update: Whether to re-calculate the MAP when x is unchanged and have a cached value. log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain {'norm_posterior': True} for SNPE. Returns: The MAP estimate. \"\"\" self . proposal = self . q return super () . map ( x = x , num_iter = num_iter , num_to_optimize = num_to_optimize , learning_rate = learning_rate , init_method = init_method , num_init_samples = num_init_samples , save_best_every = save_best_every , show_progress_bars = show_progress_bars , force_update = force_update , ) potential ( self , theta , x = None , track_gradients = False ) inherited \u00b6 Evaluates \\(\\theta\\) under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of \\(\\theta\\) under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Source code in sbi/inference/posteriors/vi_posterior.py def potential ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Evaluates $\\theta$ under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of $\\theta$ under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) sample ( self , sample_shape = torch . Size ([]), x = None , ** kwargs ) \u00b6 Samples from the variational posterior distribution. Parameters: Name Type Description Default sample_shape Union[torch.Size, Tuple[int, ...]] Shape of samples torch.Size([]) Returns: Type Description Tensor Samples from posterior. Source code in sbi/inference/posteriors/vi_posterior.py def sample ( self , sample_shape : Shape = torch . Size (), x : Optional [ Tensor ] = None , ** kwargs , ) -> Tensor : \"\"\"Samples from the variational posterior distribution. Args: sample_shape: Shape of samples Returns: Samples from posterior. \"\"\" x = self . _x_else_default_x ( x ) if self . _trained_on is None or ( x != self . _trained_on ) . all (): raise AttributeError ( f \"The variational posterior was not fit on the specified `default_x` \" f \" { x } . Please train using `posterior.train()`.\" ) samples = self . q . sample ( torch . Size ( sample_shape )) return samples . reshape (( * sample_shape , samples . shape [ - 1 ])) set_default_x ( self , x ) inherited \u00b6 Set new default x for .sample(), .log_prob to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify x in calls to .sample() and .log_prob() - only $ heta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular x=x_o (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like posterior.set_default_x(my_x).sample(mytheta) are possible. Parameters: Name Type Description Default x Tensor The default observation to set for the posterior \\(p( heta|x)\\) . required Returns: Type Description NeuralPosterior NeuralPosterior that will use a default x when not explicitly passed. Source code in sbi/inference/posteriors/vi_posterior.py def set_default_x ( self , x : Tensor ) -> \"NeuralPosterior\" : \"\"\"Set new default x for `.sample(), .log_prob` to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify `x` in calls to `.sample()` and `.log_prob()` - only $\\theta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular `x=x_o` (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like `posterior.set_default_x(my_x).sample(mytheta)` are possible. Args: x: The default observation to set for the posterior $p(\\theta|x)$. Returns: `NeuralPosterior` that will use a default `x` when not explicitly passed. \"\"\" self . _x = process_x ( x , x_shape = self . _x_shape , allow_iid_x = self . potential_fn . allow_iid_x ) . to ( self . _device ) self . _map = None return self set_q ( self , q , parameters = [], modules = []) \u00b6 Defines the variational family. You can specify over which parameters/modules we optimize. This is required for custom distributions which e.g. do not inherit nn.Modules or has the function parameters or modules to give direct access to trainable parameters. Further, you can pass a function, which constructs a variational distribution if called. Parameters: Name Type Description Default q Union[str, pyro.distributions.torch.TransformedDistribution, VIPosterior, Callable] Variational distribution, either string, distribution, or a VIPosterior object. This specifies a parametric class of distribution over which the best possible posterior approximation is searched. For string input, we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. Of course, you can also specify your own variational family by passing a parameterized distribution object i.e. a torch.distributions Distribution with methods parameters returning an iterable of all parameters (you can pass them within the paramters/modules attribute). Additionally, we allow a Callable , which allows you the pass a builder function, which if called returns an distribution. This may be useful for setting the hyperparameters e.g. num_transfroms:int by using the get_flow_builder method specifying the hyperparameters. If q is already a VIPosterior , then the arguments will be copied from it (relevant for multi-round training). required parameters Iterable List of parameters associated with the distribution object. [] modules Iterable List of modules associated with the distribution object. [] Source code in sbi/inference/posteriors/vi_posterior.py def set_q ( self , q : Union [ str , PyroTransformedDistribution , \"VIPosterior\" , Callable ], parameters : Iterable = [], modules : Iterable = [], ) -> None : \"\"\"Defines the variational family. You can specify over which parameters/modules we optimize. This is required for custom distributions which e.g. do not inherit nn.Modules or has the function `parameters` or `modules` to give direct access to trainable parameters. Further, you can pass a function, which constructs a variational distribution if called. Args: q: Variational distribution, either string, distribution, or a VIPosterior object. This specifies a parametric class of distribution over which the best possible posterior approximation is searched. For string input, we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. Of course, you can also specify your own variational family by passing a `parameterized` distribution object i.e. a torch.distributions Distribution with methods `parameters` returning an iterable of all parameters (you can pass them within the paramters/modules attribute). Additionally, we allow a `Callable`, which allows you the pass a `builder` function, which if called returns an distribution. This may be useful for setting the hyperparameters e.g. `num_transfroms:int` by using the `get_flow_builder` method specifying the hyperparameters. If q is already a `VIPosterior`, then the arguments will be copied from it (relevant for multi-round training). parameters: List of parameters associated with the distribution object. modules: List of modules associated with the distribution object. \"\"\" self . _q_arg = q if isinstance ( q , Distribution ): q = adapt_variational_distribution ( q , self . _prior , self . link_transform , parameters = parameters , modules = modules , ) make_object_deepcopy_compatible ( q ) self_custom_q_init_cache = deepcopy ( q ) self . _q_build_fn = lambda * args , ** kwargs : self_custom_q_init_cache self . _trained_on = None elif isinstance ( q , str ) or isinstance ( q , Callable ): if isinstance ( q , str ): self . _q_build_fn = get_flow_builder ( q ) else : self . _q_build_fn = q q = self . _q_build_fn ( self . _prior . event_shape , self . link_transform , device = self . _device , ) make_object_deepcopy_compatible ( q ) self . _trained_on = None elif isinstance ( q , VIPosterior ): self . _q_build_fn = q . _q_build_fn self . _trained_on = q . _trained_on self . vi_method = q . vi_method # type: ignore self . _device = q . _device self . _prior = q . _prior self . _x = q . _x self . _q_arg = q . _q_arg make_object_deepcopy_compatible ( q . q ) q = deepcopy ( q . q ) move_all_tensor_to_device ( q , self . _device ) assert isinstance ( q , Distribution ), \"\"\"Something went wrong when initializing the variational distribution. Please create an issue on github https://github.com/mackelab/sbi/issues\"\"\" check_variational_distribution ( q , self . _prior ) self . _q = q set_vi_method ( self , method ) \u00b6 Sets variational inference method. Parameters: Name Type Description Default method str One of [rKL, fKL, IW, alpha]. required Returns: Type Description VIPosterior VIPosterior for chainable calls. Source code in sbi/inference/posteriors/vi_posterior.py def set_vi_method ( self , method : str ) -> \"VIPosterior\" : \"\"\"Sets variational inference method. Args: method: One of [rKL, fKL, IW, alpha]. Returns: `VIPosterior` for chainable calls. \"\"\" self . _vi_method = method self . _optimizer_builder = get_VI_method ( method ) return self train ( self , x = None , n_particles = 256 , learning_rate = 0.001 , gamma = 0.999 , max_num_iters = 2000 , min_num_iters = 10 , clip_value = 10.0 , warm_up_rounds = 100 , retrain_from_scratch = False , reset_optimizer = False , show_progress_bar = True , check_for_convergence = True , quality_control = True , quality_control_metric = 'psis' , ** kwargs ) \u00b6 This method trains the variational posterior. Parameters: Name Type Description Default x Optional[Tensor] The observation. None n_particles int Number of samples to approximate expectations within the variational bounds. The larger the more accurate are gradient estimates, but the computational cost per iteration increases. 256 learning_rate float Learning rate of the optimizer. 0.001 gamma float Learning rate decay per iteration. We use an exponential decay scheduler. 0.999 max_num_iters int Maximum number of iterations. 2000 min_num_iters int Minimum number of iterations. 10 clip_value float Gradient clipping value, decreasing may help if you see invalid values. 10.0 warm_up_rounds int Initialize the posterior as the prior. 100 retrain_from_scratch bool Retrain the variational distributions from scratch. False reset_optimizer bool Reset the divergence optimizer False show_progress_bar bool If any progress report should be displayed. True quality_control bool If False quality control is skipped. True quality_control_metric str Which metric to use for evaluating the quality. 'psis' kwargs Hyperparameters check corresponding DivergenceOptimizer for detail eps: Determines sensitivity of convergence check. retain_graph: Boolean which decides whether to retain the computation graph. This may be required for some exotic user-specified q\u2019s. optimizer: A PyTorch Optimizer class e.g. Adam or SGD. See DivergenceOptimizer for details. scheduler: A PyTorch learning rate scheduler. See DivergenceOptimizer for details. alpha: Only used if vi_method= alpha . Determines the alpha divergence. K: Only used if vi_method= IW . Determines the number of importance weighted particles. stick_the_landing: If one should use the STL estimator (only for rKL, IW, alpha). dreg: If one should use the DREG estimator (only for rKL, IW, alpha). weight_transform: Callable applied to importance weights (only for fKL) {} Returns: Type Description VIPosterior VIPosterior (can be used to chain calls). Source code in sbi/inference/posteriors/vi_posterior.py def train ( self , x : Optional [ TorchTensor ] = None , n_particles : int = 256 , learning_rate : float = 1e-3 , gamma : float = 0.999 , max_num_iters : int = 2000 , min_num_iters : int = 10 , clip_value : float = 10.0 , warm_up_rounds : int = 100 , retrain_from_scratch : bool = False , reset_optimizer : bool = False , show_progress_bar : bool = True , check_for_convergence : bool = True , quality_control : bool = True , quality_control_metric : str = \"psis\" , ** kwargs , ) -> \"VIPosterior\" : \"\"\"This method trains the variational posterior. Args: x: The observation. n_particles: Number of samples to approximate expectations within the variational bounds. The larger the more accurate are gradient estimates, but the computational cost per iteration increases. learning_rate: Learning rate of the optimizer. gamma: Learning rate decay per iteration. We use an exponential decay scheduler. max_num_iters: Maximum number of iterations. min_num_iters: Minimum number of iterations. clip_value: Gradient clipping value, decreasing may help if you see invalid values. warm_up_rounds: Initialize the posterior as the prior. retrain_from_scratch: Retrain the variational distributions from scratch. reset_optimizer: Reset the divergence optimizer show_progress_bar: If any progress report should be displayed. quality_control: If False quality control is skipped. quality_control_metric: Which metric to use for evaluating the quality. kwargs: Hyperparameters check corresponding `DivergenceOptimizer` for detail eps: Determines sensitivity of convergence check. retain_graph: Boolean which decides whether to retain the computation graph. This may be required for some `exotic` user-specified q's. optimizer: A PyTorch Optimizer class e.g. Adam or SGD. See `DivergenceOptimizer` for details. scheduler: A PyTorch learning rate scheduler. See `DivergenceOptimizer` for details. alpha: Only used if vi_method=`alpha`. Determines the alpha divergence. K: Only used if vi_method=`IW`. Determines the number of importance weighted particles. stick_the_landing: If one should use the STL estimator (only for rKL, IW, alpha). dreg: If one should use the DREG estimator (only for rKL, IW, alpha). weight_transform: Callable applied to importance weights (only for fKL) Returns: VIPosterior: `VIPosterior` (can be used to chain calls). \"\"\" # Update optimizer with current arguments. if self . _optimizer is not None : self . _optimizer . update ({ ** locals (), ** kwargs }) # Init q and the optimizer if necessary if retrain_from_scratch : self . q = self . _q_build_fn () # type: ignore self . _optimizer = self . _optimizer_builder ( self . potential_fn , self . q , lr = learning_rate , clip_value = clip_value , gamma = gamma , n_particles = n_particles , prior = self . _prior , ** kwargs , ) if ( reset_optimizer or self . _optimizer is None or not isinstance ( self . _optimizer , self . _optimizer_builder ) ): self . _optimizer = self . _optimizer_builder ( self . potential_fn , self . q , lr = learning_rate , clip_value = clip_value , gamma = gamma , n_particles = n_particles , prior = self . _prior , ** kwargs , ) # Check context x = atleast_2d_float32_tensor ( self . _x_else_default_x ( x )) . to ( # type: ignore self . _device ) already_trained = self . _trained_on is not None and ( x == self . _trained_on ) . all () # Optimize optimizer = self . _optimizer optimizer . to ( self . _device ) optimizer . reset_loss_stats () if show_progress_bar : iters = tqdm ( range ( max_num_iters )) else : iters = range ( max_num_iters ) # Warmup before training if reset_optimizer or ( not optimizer . warm_up_was_done and not already_trained ): if show_progress_bar : iters . set_description ( # type: ignore \"Warmup phase, this may take a few seconds...\" ) optimizer . warm_up ( warm_up_rounds ) for i in iters : optimizer . step ( x ) mean_loss , std_loss = optimizer . get_loss_stats () # Update progress bar if show_progress_bar : assert isinstance ( iters , tqdm ) iters . set_description ( # type: ignore f \"Loss: { np . round ( float ( mean_loss ), 2 ) } \" f \"Std: { np . round ( float ( std_loss ), 2 ) } \" ) # Check for convergence if check_for_convergence and i > min_num_iters : if optimizer . converged (): if show_progress_bar : print ( f \" \\n Converged with loss: { np . round ( float ( mean_loss ), 2 ) } \" ) break # Training finished: self . _trained_on = x # Evaluate quality if quality_control : try : self . evaluate ( quality_control_metric = quality_control_metric ) except Exception as e : print ( f \"Quality control did not work, we reset the variational \\ posterior,please check your setting. \\ \\n Following error occured { e } \" ) self . train ( learning_rate = learning_rate * 0.1 , retrain_from_scratch = True , reset_optimizer = True , ) return self Models \u00b6 sbi . utils . get_nn_models . posterior_nn ( model , z_score_theta = 'independent' , z_score_x = 'independent' , hidden_features = 50 , num_transforms = 5 , num_bins = 10 , embedding_net = Identity (), num_components = 10 , ** kwargs ) \u00b6 Returns a function that builds a density estimator for learning the posterior. This function will usually be used for SNPE. The returned function is to be passed to the inference class when using the flexible interface. Parameters: Name Type Description Default model str The type of density estimator that will be created. One of [ mdn , made , maf , maf_rqs , nsf ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 num_transforms int Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a maf or a nsf ). Ignored if density estimator is a mdn or made . 5 num_bins int Number of bins used for the splines in nsf . Ignored if density estimator not nsf . 10 embedding_net Module Optional embedding network for simulation outputs \\(x\\) . This embedding net allows to learn features from potentially high-dimensional simulation outputs. Identity() num_components int Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. 10 kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def posterior_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , num_transforms : int = 5 , num_bins : int = 10 , embedding_net : nn . Module = nn . Identity (), num_components : int = 10 , ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a density estimator for learning the posterior. This function will usually be used for SNPE. The returned function is to be passed to the inference class when using the flexible interface. Args: model: The type of density estimator that will be created. One of [`mdn`, `made`, `maf`, `maf_rqs`, `nsf`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. num_transforms: Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a `maf` or a `nsf`). Ignored if density estimator is a `mdn` or `made`. num_bins: Number of bins used for the splines in `nsf`. Ignored if density estimator not `nsf`. embedding_net: Optional embedding network for simulation outputs $x$. This embedding net allows to learn features from potentially high-dimensional simulation outputs. num_components: Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"num_transforms\" , \"num_bins\" , \"embedding_net\" , \"num_components\" , ), ( z_score_theta , z_score_x , hidden_features , num_transforms , num_bins , embedding_net , num_components , ), ), ** kwargs , ) def build_fn_snpe_a ( batch_theta , batch_x , num_components ): \"\"\"Build function for SNPE-A Extract the number of components from the kwargs, such that they are exposed as a kwargs, offering the possibility to later override this kwarg with `functools.partial`. This is necessary in order to make sure that the MDN in SNPE-A only has one component when running the Algorithm 1 part. \"\"\" return build_mdn ( batch_x = batch_theta , batch_y = batch_x , num_components = num_components , ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"mdn\" : return build_mdn ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"made\" : return build_made ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"maf\" : return build_maf ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"maf_rqs\" : return build_maf_rqs ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"nsf\" : return build_nsf ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) else : raise NotImplementedError if model == \"mdn_snpe_a\" : if num_components != 10 : raise ValueError ( \"You set `num_components`. For SNPE-A, this has to be done at \" \"instantiation of the inference object, i.e. \" \"`inference = SNPE_A(..., num_components=20)`\" ) kwargs . pop ( \"num_components\" ) return build_fn_snpe_a if model == \"mdn_snpe_a\" else build_fn sbi . utils . get_nn_models . likelihood_nn ( model , z_score_theta = 'independent' , z_score_x = 'independent' , hidden_features = 50 , num_transforms = 5 , num_bins = 10 , embedding_net = Identity (), num_components = 10 , ** kwargs ) \u00b6 Returns a function that builds a density estimator for learning the likelihood. This function will usually be used for SNLE. The returned function is to be passed to the inference class when using the flexible interface. Parameters: Name Type Description Default model str The type of density estimator that will be created. One of [ mdn , made , maf , maf_rqs , nsf ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 num_transforms int Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a maf or a nsf ). Ignored if density estimator is a mdn or made . 5 num_bins int Number of bins used for the splines in nsf . Ignored if density estimator not nsf . 10 embedding_net Module Optional embedding network for parameters \\(\\theta\\) . Identity() num_components int Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. 10 kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def likelihood_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , num_transforms : int = 5 , num_bins : int = 10 , embedding_net : nn . Module = nn . Identity (), num_components : int = 10 , ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a density estimator for learning the likelihood. This function will usually be used for SNLE. The returned function is to be passed to the inference class when using the flexible interface. Args: model: The type of density estimator that will be created. One of [`mdn`, `made`, `maf`, `maf_rqs`, `nsf`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. num_transforms: Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a `maf` or a `nsf`). Ignored if density estimator is a `mdn` or `made`. num_bins: Number of bins used for the splines in `nsf`. Ignored if density estimator not `nsf`. embedding_net: Optional embedding network for parameters $\\theta$. num_components: Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"num_transforms\" , \"num_bins\" , \"embedding_net\" , \"num_components\" , ), ( z_score_x , z_score_theta , hidden_features , num_transforms , num_bins , embedding_net , num_components , ), ), ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"mdn\" : return build_mdn ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"made\" : return build_made ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"maf\" : return build_maf ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"maf_rqs\" : return build_maf_rqs ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"nsf\" : return build_nsf ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"mnle\" : return build_mnle ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) else : raise NotImplementedError return build_fn sbi . utils . get_nn_models . classifier_nn ( model , z_score_theta = 'independent' , z_score_x = 'independent' , hidden_features = 50 , embedding_net_theta = Identity (), embedding_net_x = Identity (), ** kwargs ) \u00b6 Returns a function that builds a classifier for learning density ratios. This function will usually be used for SNRE. The returned function is to be passed to the inference class when using the flexible interface. Note that in the view of the SNRE classifier we build below, x=theta and y=x. Parameters: Name Type Description Default model str The type of classifier that will be created. One of [ linear , mlp , resnet ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 embedding_net_theta Module Optional embedding network for parameters \\(\\theta\\) . Identity() embedding_net_x Module Optional embedding network for simulation outputs \\(x\\) . This embedding net allows to learn features from potentially high-dimensional simulation outputs. Identity() kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def classifier_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , embedding_net_theta : nn . Module = nn . Identity (), embedding_net_x : nn . Module = nn . Identity (), ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a classifier for learning density ratios. This function will usually be used for SNRE. The returned function is to be passed to the inference class when using the flexible interface. Note that in the view of the SNRE classifier we build below, x=theta and y=x. Args: model: The type of classifier that will be created. One of [`linear`, `mlp`, `resnet`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. embedding_net_theta: Optional embedding network for parameters $\\theta$. embedding_net_x: Optional embedding network for simulation outputs $x$. This embedding net allows to learn features from potentially high-dimensional simulation outputs. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"embedding_net_x\" , \"embedding_net_y\" , ), ( z_score_theta , z_score_x , hidden_features , embedding_net_theta , embedding_net_x , ), ), ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"linear\" : return build_linear_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) if model == \"mlp\" : return build_mlp_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) if model == \"resnet\" : return build_resnet_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) else : raise NotImplementedError return build_fn Potentials \u00b6 sbi . inference . potentials . posterior_based_potential . posterior_estimator_based_potential ( posterior_estimator , prior , x_o , enable_transform = True ) \u00b6 Returns the potential for posterior-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. The potential is the same as the log-probability of the posterior_estimator , but it is set to \\(-\\inf\\) outside of the prior bounds. Parameters: Name Type Description Default posterior_estimator Module The neural network modelling the posterior. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the posterior. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/posterior_based_potential.py def posterior_estimator_based_potential ( posterior_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns the potential for posterior-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. The potential is the same as the log-probability of the `posterior_estimator`, but it is set to $-\\inf$ outside of the prior bounds. Args: posterior_estimator: The neural network modelling the posterior. prior: The prior distribution. x_o: The observed data at which to evaluate the posterior. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( posterior_estimator . parameters ()) . device ) potential_fn = PosteriorBasedPotential ( posterior_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform sbi . inference . potentials . likelihood_based_potential . likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o , enable_transform = True ) \u00b6 Returns potential \\(\\log(p(x_o|\\theta)p(\\theta))\\) for likelihood-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Parameters: Name Type Description Default likelihood_estimator Module The neural network modelling the likelihood. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the likelihood. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function \\(p(x_o|\\theta)p(\\theta)\\) and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/likelihood_based_potential.py def likelihood_estimator_based_potential ( likelihood_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns potential $\\log(p(x_o|\\theta)p(\\theta))$ for likelihood-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Args: likelihood_estimator: The neural network modelling the likelihood. prior: The prior distribution. x_o: The observed data at which to evaluate the likelihood. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function $p(x_o|\\theta)p(\\theta)$ and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( likelihood_estimator . parameters ()) . device ) potential_fn = LikelihoodBasedPotential ( likelihood_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform sbi . inference . potentials . ratio_based_potential . ratio_estimator_based_potential ( ratio_estimator , prior , x_o , enable_transform = True ) \u00b6 Returns the potential for ratio-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Parameters: Name Type Description Default ratio_estimator Module The neural network modelling likelihood-to-evidence ratio. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the likelihood-to-evidence ratio. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/ratio_based_potential.py def ratio_estimator_based_potential ( ratio_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns the potential for ratio-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Args: ratio_estimator: The neural network modelling likelihood-to-evidence ratio. prior: The prior distribution. x_o: The observed data at which to evaluate the likelihood-to-evidence ratio. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( ratio_estimator . parameters ()) . device ) potential_fn = RatioBasedPotential ( ratio_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform Analysis \u00b6 sbi . analysis . plot . pairplot ( samples , points = None , limits = None , subset = None , offdiag = 'hist' , diag = 'hist' , figsize = ( 10 , 10 ), labels = None , ticks = [], upper = None , fig = None , axes = None , ** kwargs ) \u00b6 Plot samples in a 2D grid showing marginals and pairwise marginals. Each of the diagonal plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Each upper-diagonal plot can be interpreted as a 2D-marginal of the distribution. Parameters: Name Type Description Default samples Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Samples used to build the histogram. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] List of additional points to scatter. None limits Union[List, torch.Tensor] Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on). None offdiag Union[str, List[str]] Plotting style for upper diagonal, {hist, scatter, contour, cond, None}. 'hist' upper Optional[str] deprecated, use offdiag instead. None diag Union[str, List[str]] Plotting style for diagonal, {hist, cond, None}. 'hist' figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def pairplot ( samples : Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , limits : Optional [ Union [ List , torch . Tensor ]] = None , subset : Optional [ List [ int ]] = None , offdiag : Optional [ Union [ List [ str ], str ]] = \"hist\" , diag : Optional [ Union [ List [ str ], str ]] = \"hist\" , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], upper : Optional [ str ] = None , fig = None , axes = None , ** kwargs , ): \"\"\" Plot samples in a 2D grid showing marginals and pairwise marginals. Each of the diagonal plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Each upper-diagonal plot can be interpreted as a 2D-marginal of the distribution. Args: samples: Samples used to build the histogram. points: List of additional points to scatter. limits: Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on). offdiag: Plotting style for upper diagonal, {hist, scatter, contour, cond, None}. upper: deprecated, use offdiag instead. diag: Plotting style for diagonal, {hist, cond, None}. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" # TODO: add color map support # TODO: automatically determine good bin sizes for histograms # TODO: add legend (if legend is True) opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) samples , dim , limits = prepare_for_plot ( samples , limits ) # checks. if opts [ \"legend\" ]: assert len ( opts [ \"samples_labels\" ]) >= len ( samples ), \"Provide at least as many labels as samples.\" if opts [ \"upper\" ] is not None : warn ( \"upper is deprecated, use offdiag instead.\" ) opts [ \"offdiag\" ] = opts [ \"upper\" ] # Prepare diag/upper/lower if type ( opts [ \"diag\" ]) is not list : opts [ \"diag\" ] = [ opts [ \"diag\" ] for _ in range ( len ( samples ))] if type ( opts [ \"offdiag\" ]) is not list : opts [ \"offdiag\" ] = [ opts [ \"offdiag\" ] for _ in range ( len ( samples ))] # if type(opts['lower']) is not list: # opts['lower'] = [opts['lower'] for _ in range(len(samples))] opts [ \"lower\" ] = None diag_func = get_diag_func ( samples , limits , opts , ** kwargs ) def offdiag_func ( row , col , limits , ** kwargs ): if len ( samples ) > 0 : for n , v in enumerate ( samples ): if opts [ \"offdiag\" ][ n ] == \"hist\" or opts [ \"offdiag\" ][ n ] == \"hist2d\" : hist , xedges , yedges = np . histogram2d ( v [:, col ], v [:, row ], range = [ [ limits [ col ][ 0 ], limits [ col ][ 1 ]], [ limits [ row ][ 0 ], limits [ row ][ 1 ]], ], ** opts [ \"hist_offdiag\" ], ) plt . imshow ( hist . T , origin = \"lower\" , extent = ( xedges [ 0 ], xedges [ - 1 ], yedges [ 0 ], yedges [ - 1 ], ), aspect = \"auto\" , ) elif opts [ \"offdiag\" ][ n ] in [ \"kde\" , \"kde2d\" , \"contour\" , \"contourf\" , ]: density = gaussian_kde ( v [:, [ col , row ]] . T , bw_method = opts [ \"kde_offdiag\" ][ \"bw_method\" ], ) X , Y = np . meshgrid ( np . linspace ( limits [ col ][ 0 ], limits [ col ][ 1 ], opts [ \"kde_offdiag\" ][ \"bins\" ], ), np . linspace ( limits [ row ][ 0 ], limits [ row ][ 1 ], opts [ \"kde_offdiag\" ][ \"bins\" ], ), ) positions = np . vstack ([ X . ravel (), Y . ravel ()]) Z = np . reshape ( density ( positions ) . T , X . shape ) if opts [ \"offdiag\" ][ n ] == \"kde\" or opts [ \"offdiag\" ][ n ] == \"kde2d\" : plt . imshow ( Z , extent = ( limits [ col ][ 0 ], limits [ col ][ 1 ], limits [ row ][ 0 ], limits [ row ][ 1 ], ), origin = \"lower\" , aspect = \"auto\" , ) elif opts [ \"offdiag\" ][ n ] == \"contour\" : if opts [ \"contour_offdiag\" ][ \"percentile\" ]: Z = probs2contours ( Z , opts [ \"contour_offdiag\" ][ \"levels\" ]) else : Z = ( Z - Z . min ()) / ( Z . max () - Z . min ()) plt . contour ( X , Y , Z , origin = \"lower\" , extent = [ limits [ col ][ 0 ], limits [ col ][ 1 ], limits [ row ][ 0 ], limits [ row ][ 1 ], ], colors = opts [ \"samples_colors\" ][ n ], levels = opts [ \"contour_offdiag\" ][ \"levels\" ], ) else : pass elif opts [ \"offdiag\" ][ n ] == \"scatter\" : plt . scatter ( v [:, col ], v [:, row ], color = opts [ \"samples_colors\" ][ n ], ** opts [ \"scatter_offdiag\" ], ) elif opts [ \"offdiag\" ][ n ] == \"plot\" : plt . plot ( v [:, col ], v [:, row ], color = opts [ \"samples_colors\" ][ n ], ** opts [ \"plot_offdiag\" ], ) else : pass return _arrange_plots ( diag_func , offdiag_func , dim , limits , points , opts , fig = fig , axes = axes ) sbi . analysis . plot . marginal_plot ( samples , points = None , limits = None , subset = None , diag = 'hist' , figsize = ( 10 , 10 ), labels = None , ticks = [], fig = None , axes = None , ** kwargs ) \u00b6 Plot samples in a row showing 1D marginals of selected dimensions. Each of the plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Parameters: Name Type Description Default samples Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Samples used to build the histogram. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] List of additional points to scatter. None limits Union[List, torch.Tensor] Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on). None diag Optional[str] Plotting style for 1D marginals, {hist, kde cond, None}. 'hist' figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] points_colors Colors of the points . required fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def marginal_plot ( samples : Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , limits : Optional [ Union [ List , torch . Tensor ]] = None , subset : Optional [ List [ int ]] = None , diag : Optional [ str ] = \"hist\" , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], fig = None , axes = None , ** kwargs , ): \"\"\" Plot samples in a row showing 1D marginals of selected dimensions. Each of the plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Args: samples: Samples used to build the histogram. points: List of additional points to scatter. limits: Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on). diag: Plotting style for 1D marginals, {hist, kde cond, None}. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. points_colors: Colors of the `points`. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) samples , dim , limits = prepare_for_plot ( samples , limits ) # Prepare diag/upper/lower if type ( opts [ \"diag\" ]) is not list : opts [ \"diag\" ] = [ opts [ \"diag\" ] for _ in range ( len ( samples ))] diag_func = get_diag_func ( samples , limits , opts , ** kwargs ) return _arrange_plots ( diag_func , None , dim , limits , points , opts , fig = fig , axes = axes ) sbi . analysis . plot . conditional_pairplot ( density , condition , limits , points = None , subset = None , resolution = 50 , figsize = ( 10 , 10 ), labels = None , ticks = [], fig = None , axes = None , ** kwargs ) \u00b6 Plot conditional distribution given all other parameters. The conditionals can be interpreted as slices through the density at a location given by condition . For example: Say we have a 3D density with parameters \\(\\theta_0\\) , \\(\\theta_1\\) , \\(\\theta_2\\) and a condition \\(c\\) passed by the user in the condition argument. For the plot of \\(\\theta_0\\) on the diagonal, this will plot the conditional \\(p(\\theta_0 | \\theta_1=c[1], \\theta_2=c[2])\\) . For the upper diagonal of \\(\\theta_1\\) and \\(\\theta_2\\) , it will plot \\(p(\\theta_1, \\theta_2 | \\theta_0=c[0])\\) . All other diagonals and upper-diagonals are built in the corresponding way. Parameters: Name Type Description Default density Any Probability density with a log_prob() method. required condition Tensor Condition that all but the one/two regarded parameters are fixed to. The condition should be of shape (1, dim_theta), i.e. it could e.g. be a sample from the posterior distribution. required limits Union[List, torch.Tensor] Limits in between which each parameter will be evaluated. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Additional points to scatter. None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on) None resolution int Resolution of the grid at which we evaluate the pdf . 50 figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] points_colors Colors of the points . required fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def conditional_pairplot ( density : Any , condition : torch . Tensor , limits : Union [ List , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , subset : Optional [ List [ int ]] = None , resolution : int = 50 , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], fig = None , axes = None , ** kwargs , ): r \"\"\" Plot conditional distribution given all other parameters. The conditionals can be interpreted as slices through the `density` at a location given by `condition`. For example: Say we have a 3D density with parameters $\\theta_0$, $\\theta_1$, $\\theta_2$ and a condition $c$ passed by the user in the `condition` argument. For the plot of $\\theta_0$ on the diagonal, this will plot the conditional $p(\\theta_0 | \\theta_1=c[1], \\theta_2=c[2])$. For the upper diagonal of $\\theta_1$ and $\\theta_2$, it will plot $p(\\theta_1, \\theta_2 | \\theta_0=c[0])$. All other diagonals and upper-diagonals are built in the corresponding way. Args: density: Probability density with a `log_prob()` method. condition: Condition that all but the one/two regarded parameters are fixed to. The condition should be of shape (1, dim_theta), i.e. it could e.g. be a sample from the posterior distribution. limits: Limits in between which each parameter will be evaluated. points: Additional points to scatter. subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on) resolution: Resolution of the grid at which we evaluate the `pdf`. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. points_colors: Colors of the `points`. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" device = density . _device if hasattr ( density , \"_device\" ) else \"cpu\" # Setting these is required because _pairplot_scaffold will check if opts['diag'] is # `None`. This would break if opts has no key 'diag'. Same for 'upper'. diag = \"cond\" offdiag = \"cond\" opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) opts [ \"lower\" ] = None dim , limits , eps_margins = prepare_for_conditional_plot ( condition , opts ) diag_func = get_conditional_diag_func ( opts , limits , eps_margins , resolution ) def offdiag_func ( row , col , ** kwargs ): p_image = ( eval_conditional_density ( opts [ \"density\" ], opts [ \"condition\" ] . to ( device ), limits . to ( device ), row , col , resolution = resolution , eps_margins1 = eps_margins [ row ], eps_margins2 = eps_margins [ col ], ) . to ( \"cpu\" ) . numpy () ) plt . imshow ( p_image . T , origin = \"lower\" , extent = ( limits [ col , 0 ] . item (), limits [ col , 1 ] . item (), limits [ row , 0 ] . item (), limits [ row , 1 ] . item (), ), aspect = \"auto\" , ) return _arrange_plots ( diag_func , offdiag_func , dim , limits , points , opts , fig = fig , axes = axes ) sbi . analysis . conditional_density . conditional_corrcoeff ( density , limits , condition , subset = None , resolution = 50 ) \u00b6 Returns the conditional correlation matrix of a distribution. To compute the conditional distribution, we condition all but two parameters to values from condition , and then compute the Pearson correlation coefficient \\(\\rho\\) between the remaining two parameters under the distribution density . We do so for any pair of parameters specified in subset , thus creating a matrix containing conditional correlations between any pair of parameters. If condition is a batch of conditions, this function computes the conditional correlation matrix for each one of them and returns the mean. Parameters: Name Type Description Default density Any Probability density function with .log_prob() function. required limits Tensor Limits within which to evaluate the density . required condition Tensor Values to condition the density on. If a batch of conditions is passed, we compute the conditional correlation matrix for each of them and return the average conditional correlation matrix. required subset Optional[List[int]] Evaluate the conditional distribution only on a subset of dimensions. If None this function uses all dimensions. None resolution int Number of grid points on which the conditional distribution is evaluated. A higher value increases the accuracy of the estimated correlation but also increases the computational cost. 50 Returns: Average conditional correlation matrix of shape either (num_dim, num_dim) or (len(subset), len(subset)) if subset was specified. Source code in sbi/analysis/conditional_density.py def conditional_corrcoeff ( density : Any , limits : Tensor , condition : Tensor , subset : Optional [ List [ int ]] = None , resolution : int = 50 , ) -> Tensor : r \"\"\"Returns the conditional correlation matrix of a distribution. To compute the conditional distribution, we condition all but two parameters to values from `condition`, and then compute the Pearson correlation coefficient $\\rho$ between the remaining two parameters under the distribution `density`. We do so for any pair of parameters specified in `subset`, thus creating a matrix containing conditional correlations between any pair of parameters. If `condition` is a batch of conditions, this function computes the conditional correlation matrix for each one of them and returns the mean. Args: density: Probability density function with `.log_prob()` function. limits: Limits within which to evaluate the `density`. condition: Values to condition the `density` on. If a batch of conditions is passed, we compute the conditional correlation matrix for each of them and return the average conditional correlation matrix. subset: Evaluate the conditional distribution only on a subset of dimensions. If `None` this function uses all dimensions. resolution: Number of grid points on which the conditional distribution is evaluated. A higher value increases the accuracy of the estimated correlation but also increases the computational cost. Returns: Average conditional correlation matrix of shape either `(num_dim, num_dim)` or `(len(subset), len(subset))` if `subset` was specified. \"\"\" device = density . _device if hasattr ( density , \"_device\" ) else \"cpu\" subset_ = subset if subset is not None else range ( condition . shape [ 1 ]) correlation_matrices = [] for cond in condition : correlation_matrices . append ( torch . stack ( [ compute_corrcoeff ( eval_conditional_density ( density , cond . to ( device ), limits . to ( device ), dim1 = dim1 , dim2 = dim2 , resolution = resolution , ), limits [[ dim1 , dim2 ]] . to ( device ), ) for dim1 in subset_ for dim2 in subset_ if dim1 < dim2 ] ) ) average_correlations = torch . mean ( torch . stack ( correlation_matrices ), dim = 0 ) # `average_correlations` is still a vector containing the upper triangular entries. # Below, assemble them into a matrix: av_correlation_matrix = torch . zeros (( len ( subset_ ), len ( subset_ )), device = device ) triu_indices = torch . triu_indices ( row = len ( subset_ ), col = len ( subset_ ), offset = 1 , device = device ) av_correlation_matrix [ triu_indices [ 0 ], triu_indices [ 1 ]] = average_correlations # Make the matrix symmetric by copying upper diagonal to lower diagonal. av_correlation_matrix = torch . triu ( av_correlation_matrix ) + torch . tril ( av_correlation_matrix . T ) av_correlation_matrix . fill_diagonal_ ( 1.0 ) return av_correlation_matrix","title":"API Reference"},{"location":"reference/#api-reference","text":"","title":"API Reference"},{"location":"reference/#inference","text":"","title":"Inference"},{"location":"reference/#sbi.inference.base.infer","text":"Runs simulation-based inference and returns the posterior. This function provides a simple interface to run sbi. Inference is run for a single round and hence the returned posterior \\(p(\\theta|x)\\) can be sampled and evaluated for any \\(x\\) (i.e. it is amortized). The scope of this function is limited to the most essential features of sbi. For more flexibility (e.g. multi-round inference, different density estimators) please use the flexible interface described here: https://www.mackelab.org/sbi/tutorial/02_flexible_interface/ Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\mathrm{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required prior Distribution A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with .log_prob() and .sample() (for example, a PyTorch distribution) can be used. required method str What inference method to use. Either of SNPE, SNLE or SNRE. required num_simulations int Number of simulation calls. More simulations means a longer runtime, but a better posterior estimate. required num_workers int Number of parallel workers to use for simulations. 1 Returns: Posterior over parameters conditional on observations (amortized). Source code in sbi/inference/base.py def infer ( simulator : Callable , prior : Distribution , method : str , num_simulations : int , num_workers : int = 1 , ) -> NeuralPosterior : r \"\"\"Runs simulation-based inference and returns the posterior. This function provides a simple interface to run sbi. Inference is run for a single round and hence the returned posterior $p(\\theta|x)$ can be sampled and evaluated for any $x$ (i.e. it is amortized). The scope of this function is limited to the most essential features of sbi. For more flexibility (e.g. multi-round inference, different density estimators) please use the flexible interface described here: https://www.mackelab.org/sbi/tutorial/02_flexible_interface/ Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\mathrm{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. method: What inference method to use. Either of SNPE, SNLE or SNRE. num_simulations: Number of simulation calls. More simulations means a longer runtime, but a better posterior estimate. num_workers: Number of parallel workers to use for simulations. Returns: Posterior over parameters conditional on observations (amortized). \"\"\" try : method_fun : Callable = getattr ( sbi . inference , method . upper ()) except AttributeError : raise NameError ( \"Method not available. `method` must be one of 'SNPE', 'SNLE', 'SNRE'.\" ) simulator , prior = prepare_for_sbi ( simulator , prior ) inference = method_fun ( prior = prior ) theta , x = simulate_for_sbi ( simulator = simulator , proposal = prior , num_simulations = num_simulations , num_workers = num_workers , ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () return posterior","title":"infer()"},{"location":"reference/#sbi.utils.user_input_checks.prepare_for_sbi","text":"Prepare simulator and prior for usage in sbi. NOTE: This is a wrapper around process_prior and process_simulator which can be used in isolation as well. Attempts to meet the following requirements by reshaping and type-casting: the simulator function receives as input and returns a Tensor. the simulator can simulate batches of parameters and return batches of data. the prior does not produce batches and samples and evaluates to Tensor. the output shape is a torch.Size((1,N)) (i.e, has a leading batch dimension 1). If this is not possible, a suitable exception will be raised. Parameters: Name Type Description Default simulator Callable Simulator as provided by the user. required prior Prior as provided by the user. required Returns: Type Description Tuple[Callable, torch.distributions.distribution.Distribution] Tuple (simulator, prior) checked and matching the requirements of sbi. Source code in sbi/utils/user_input_checks.py def prepare_for_sbi ( simulator : Callable , prior ) -> Tuple [ Callable , Distribution ]: \"\"\"Prepare simulator and prior for usage in sbi. NOTE: This is a wrapper around `process_prior` and `process_simulator` which can be used in isolation as well. Attempts to meet the following requirements by reshaping and type-casting: - the simulator function receives as input and returns a Tensor.
    - the simulator can simulate batches of parameters and return batches of data.
    - the prior does not produce batches and samples and evaluates to Tensor.
    - the output shape is a `torch.Size((1,N))` (i.e, has a leading batch dimension 1). If this is not possible, a suitable exception will be raised. Args: simulator: Simulator as provided by the user. prior: Prior as provided by the user. Returns: Tuple (simulator, prior) checked and matching the requirements of sbi. \"\"\" # Check prior, return PyTorch prior. prior , _ , prior_returns_numpy = process_prior ( prior ) # Check simulator, returns PyTorch simulator able to simulate batches. simulator = process_simulator ( simulator , prior , prior_returns_numpy ) # Consistency check after making ready for sbi. check_sbi_inputs ( simulator , prior ) return simulator , prior","title":"prepare_for_sbi()"},{"location":"reference/#sbi.inference.base.simulate_for_sbi","text":"Returns ( \\(\\theta, x\\) ) pairs obtained from sampling the proposal and simulating. This function performs two steps: Sample parameters \\(\\theta\\) from the proposal . Simulate these parameters to obtain \\(x\\) . Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\text{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required proposal Any Probability distribution that the parameters \\(\\theta\\) are sampled from. required num_simulations int Number of simulations that are run. required num_workers int Number of parallel workers to use for simulations. 1 simulation_batch_size int Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). 1 seed Optional[int] Seed for reproducibility. None show_progress_bar bool Whether to show a progress bar for simulating. This will not affect whether there will be a progressbar while drawing samples from the proposal. True Returns: Sampled parameters \\(\\theta\\) and simulation-outputs \\(x\\) . Source code in sbi/inference/base.py def simulate_for_sbi ( simulator : Callable , proposal : Any , num_simulations : int , num_workers : int = 1 , simulation_batch_size : int = 1 , seed : Optional [ int ] = None , show_progress_bar : bool = True , ) -> Tuple [ Tensor , Tensor ]: r \"\"\"Returns ($\\theta, x$) pairs obtained from sampling the proposal and simulating. This function performs two steps: - Sample parameters $\\theta$ from the `proposal`. - Simulate these parameters to obtain $x$. Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\text{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. proposal: Probability distribution that the parameters $\\theta$ are sampled from. num_simulations: Number of simulations that are run. num_workers: Number of parallel workers to use for simulations. simulation_batch_size: Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). seed: Seed for reproducibility. show_progress_bar: Whether to show a progress bar for simulating. This will not affect whether there will be a progressbar while drawing samples from the proposal. Returns: Sampled parameters $\\theta$ and simulation-outputs $x$. \"\"\" theta = proposal . sample (( num_simulations ,)) x = simulate_in_batches ( simulator = simulator , theta = theta , sim_batch_size = simulation_batch_size , num_workers = num_workers , seed = seed , show_progress_bars = show_progress_bar , ) return theta , x","title":"simulate_for_sbi()"},{"location":"reference/#sbi.inference.snpe.snpe_a.SNPE_A","text":"","title":"SNPE_A"},{"location":"reference/#sbi.inference.snpe.snpe_c.SNPE_C","text":"","title":"SNPE_C"},{"location":"reference/#sbi.inference.snle.snle_a.SNLE_A","text":"","title":"SNLE_A"},{"location":"reference/#sbi.inference.snre.snre_a.SNRE_A","text":"","title":"SNRE_A"},{"location":"reference/#sbi.inference.snre.snre_b.SNRE_B","text":"","title":"SNRE_B"},{"location":"reference/#sbi.inference.snre.snre_c.SNRE_C","text":"","title":"SNRE_C"},{"location":"reference/#sbi.inference.snre.bnre.BNRE","text":"","title":"BNRE"},{"location":"reference/#sbi.inference.abc.mcabc.MCABC","text":"","title":"MCABC"},{"location":"reference/#sbi.inference.abc.smcabc.SMCABC","text":"","title":"SMCABC"},{"location":"reference/#posteriors","text":"","title":"Posteriors"},{"location":"reference/#sbi.inference.posteriors.direct_posterior.DirectPosterior","text":"Posterior \\(p(\\theta|x_o)\\) with log_prob() and sample() methods, only applicable to SNPE. SNPE trains a neural network to directly approximate the posterior distribution. However, for bounded priors, the neural network can have leakage: it puts non-zero mass in regions where the prior is zero. The DirectPosterior class wraps the trained network to deal with these cases. Specifically, this class offers the following functionality: - correct the calculation of the log probability such that it compensates for the leakage. - reject samples that lie outside of the prior bounds. This class can not be used in combination with SNLE or SNRE.","title":"DirectPosterior"},{"location":"reference/#sbi.inference.posteriors.importance_posterior.ImportanceSamplingPosterior","text":"Provides importance sampling to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). ImportanceSamplingPosterior allows to estimate the posterior log-probability by estimating the normlalization constant with importance sampling. It also allows to perform importance sampling (with .sample() ) and to draw approximate samples with sampling-importance-resampling (SIR) (with .sir_sample() )","title":"ImportanceSamplingPosterior"},{"location":"reference/#sbi.inference.posteriors.mcmc_posterior.MCMCPosterior","text":"Provides MCMC to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). MCMCPosterior allows to sample from the posterior with MCMC.","title":"MCMCPosterior"},{"location":"reference/#sbi.inference.posteriors.rejection_posterior.RejectionPosterior","text":"Provides rejection sampling to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). RejectionPosterior allows to sample from the posterior with rejection sampling.","title":"RejectionPosterior"},{"location":"reference/#sbi.inference.posteriors.vi_posterior.VIPosterior","text":"Provides VI (Variational Inference) to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). VIPosterior allows to learn a tractable variational posterior \\(q(\\theta)\\) which approximates the true posterior \\(p(\\theta|x_o)\\) . After this second training stage, we can produce approximate posterior samples, by just sampling from q with no additional cost. For additional information see [1] and [2]. References: [1] Variational methods for simulation-based inference, Manuel Gl\u00f6ckler, Michael Deistler, Jakob Macke, 2022, https://openreview.net/forum?id=kZ0UYdhqkNY [2] Sequential Neural Posterior and Likelihood Approximation, Samuel Wiqvist, Jes Frellsen, Umberto Picchini, 2021, https://arxiv.org/abs/2102.06522","title":"VIPosterior"},{"location":"reference/#models","text":"","title":"Models"},{"location":"reference/#sbi.utils.get_nn_models.posterior_nn","text":"Returns a function that builds a density estimator for learning the posterior. This function will usually be used for SNPE. The returned function is to be passed to the inference class when using the flexible interface. Parameters: Name Type Description Default model str The type of density estimator that will be created. One of [ mdn , made , maf , maf_rqs , nsf ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 num_transforms int Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a maf or a nsf ). Ignored if density estimator is a mdn or made . 5 num_bins int Number of bins used for the splines in nsf . Ignored if density estimator not nsf . 10 embedding_net Module Optional embedding network for simulation outputs \\(x\\) . This embedding net allows to learn features from potentially high-dimensional simulation outputs. Identity() num_components int Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. 10 kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def posterior_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , num_transforms : int = 5 , num_bins : int = 10 , embedding_net : nn . Module = nn . Identity (), num_components : int = 10 , ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a density estimator for learning the posterior. This function will usually be used for SNPE. The returned function is to be passed to the inference class when using the flexible interface. Args: model: The type of density estimator that will be created. One of [`mdn`, `made`, `maf`, `maf_rqs`, `nsf`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. num_transforms: Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a `maf` or a `nsf`). Ignored if density estimator is a `mdn` or `made`. num_bins: Number of bins used for the splines in `nsf`. Ignored if density estimator not `nsf`. embedding_net: Optional embedding network for simulation outputs $x$. This embedding net allows to learn features from potentially high-dimensional simulation outputs. num_components: Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"num_transforms\" , \"num_bins\" , \"embedding_net\" , \"num_components\" , ), ( z_score_theta , z_score_x , hidden_features , num_transforms , num_bins , embedding_net , num_components , ), ), ** kwargs , ) def build_fn_snpe_a ( batch_theta , batch_x , num_components ): \"\"\"Build function for SNPE-A Extract the number of components from the kwargs, such that they are exposed as a kwargs, offering the possibility to later override this kwarg with `functools.partial`. This is necessary in order to make sure that the MDN in SNPE-A only has one component when running the Algorithm 1 part. \"\"\" return build_mdn ( batch_x = batch_theta , batch_y = batch_x , num_components = num_components , ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"mdn\" : return build_mdn ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"made\" : return build_made ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"maf\" : return build_maf ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"maf_rqs\" : return build_maf_rqs ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"nsf\" : return build_nsf ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) else : raise NotImplementedError if model == \"mdn_snpe_a\" : if num_components != 10 : raise ValueError ( \"You set `num_components`. For SNPE-A, this has to be done at \" \"instantiation of the inference object, i.e. \" \"`inference = SNPE_A(..., num_components=20)`\" ) kwargs . pop ( \"num_components\" ) return build_fn_snpe_a if model == \"mdn_snpe_a\" else build_fn","title":"posterior_nn()"},{"location":"reference/#sbi.utils.get_nn_models.likelihood_nn","text":"Returns a function that builds a density estimator for learning the likelihood. This function will usually be used for SNLE. The returned function is to be passed to the inference class when using the flexible interface. Parameters: Name Type Description Default model str The type of density estimator that will be created. One of [ mdn , made , maf , maf_rqs , nsf ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 num_transforms int Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a maf or a nsf ). Ignored if density estimator is a mdn or made . 5 num_bins int Number of bins used for the splines in nsf . Ignored if density estimator not nsf . 10 embedding_net Module Optional embedding network for parameters \\(\\theta\\) . Identity() num_components int Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. 10 kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def likelihood_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , num_transforms : int = 5 , num_bins : int = 10 , embedding_net : nn . Module = nn . Identity (), num_components : int = 10 , ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a density estimator for learning the likelihood. This function will usually be used for SNLE. The returned function is to be passed to the inference class when using the flexible interface. Args: model: The type of density estimator that will be created. One of [`mdn`, `made`, `maf`, `maf_rqs`, `nsf`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. num_transforms: Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a `maf` or a `nsf`). Ignored if density estimator is a `mdn` or `made`. num_bins: Number of bins used for the splines in `nsf`. Ignored if density estimator not `nsf`. embedding_net: Optional embedding network for parameters $\\theta$. num_components: Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"num_transforms\" , \"num_bins\" , \"embedding_net\" , \"num_components\" , ), ( z_score_x , z_score_theta , hidden_features , num_transforms , num_bins , embedding_net , num_components , ), ), ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"mdn\" : return build_mdn ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"made\" : return build_made ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"maf\" : return build_maf ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"maf_rqs\" : return build_maf_rqs ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"nsf\" : return build_nsf ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"mnle\" : return build_mnle ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) else : raise NotImplementedError return build_fn","title":"likelihood_nn()"},{"location":"reference/#sbi.utils.get_nn_models.classifier_nn","text":"Returns a function that builds a classifier for learning density ratios. This function will usually be used for SNRE. The returned function is to be passed to the inference class when using the flexible interface. Note that in the view of the SNRE classifier we build below, x=theta and y=x. Parameters: Name Type Description Default model str The type of classifier that will be created. One of [ linear , mlp , resnet ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 embedding_net_theta Module Optional embedding network for parameters \\(\\theta\\) . Identity() embedding_net_x Module Optional embedding network for simulation outputs \\(x\\) . This embedding net allows to learn features from potentially high-dimensional simulation outputs. Identity() kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def classifier_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , embedding_net_theta : nn . Module = nn . Identity (), embedding_net_x : nn . Module = nn . Identity (), ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a classifier for learning density ratios. This function will usually be used for SNRE. The returned function is to be passed to the inference class when using the flexible interface. Note that in the view of the SNRE classifier we build below, x=theta and y=x. Args: model: The type of classifier that will be created. One of [`linear`, `mlp`, `resnet`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. embedding_net_theta: Optional embedding network for parameters $\\theta$. embedding_net_x: Optional embedding network for simulation outputs $x$. This embedding net allows to learn features from potentially high-dimensional simulation outputs. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"embedding_net_x\" , \"embedding_net_y\" , ), ( z_score_theta , z_score_x , hidden_features , embedding_net_theta , embedding_net_x , ), ), ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"linear\" : return build_linear_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) if model == \"mlp\" : return build_mlp_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) if model == \"resnet\" : return build_resnet_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) else : raise NotImplementedError return build_fn","title":"classifier_nn()"},{"location":"reference/#potentials","text":"","title":"Potentials"},{"location":"reference/#sbi.inference.potentials.posterior_based_potential.posterior_estimator_based_potential","text":"Returns the potential for posterior-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. The potential is the same as the log-probability of the posterior_estimator , but it is set to \\(-\\inf\\) outside of the prior bounds. Parameters: Name Type Description Default posterior_estimator Module The neural network modelling the posterior. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the posterior. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/posterior_based_potential.py def posterior_estimator_based_potential ( posterior_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns the potential for posterior-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. The potential is the same as the log-probability of the `posterior_estimator`, but it is set to $-\\inf$ outside of the prior bounds. Args: posterior_estimator: The neural network modelling the posterior. prior: The prior distribution. x_o: The observed data at which to evaluate the posterior. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( posterior_estimator . parameters ()) . device ) potential_fn = PosteriorBasedPotential ( posterior_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform","title":"posterior_estimator_based_potential()"},{"location":"reference/#sbi.inference.potentials.likelihood_based_potential.likelihood_estimator_based_potential","text":"Returns potential \\(\\log(p(x_o|\\theta)p(\\theta))\\) for likelihood-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Parameters: Name Type Description Default likelihood_estimator Module The neural network modelling the likelihood. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the likelihood. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function \\(p(x_o|\\theta)p(\\theta)\\) and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/likelihood_based_potential.py def likelihood_estimator_based_potential ( likelihood_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns potential $\\log(p(x_o|\\theta)p(\\theta))$ for likelihood-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Args: likelihood_estimator: The neural network modelling the likelihood. prior: The prior distribution. x_o: The observed data at which to evaluate the likelihood. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function $p(x_o|\\theta)p(\\theta)$ and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( likelihood_estimator . parameters ()) . device ) potential_fn = LikelihoodBasedPotential ( likelihood_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform","title":"likelihood_estimator_based_potential()"},{"location":"reference/#sbi.inference.potentials.ratio_based_potential.ratio_estimator_based_potential","text":"Returns the potential for ratio-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Parameters: Name Type Description Default ratio_estimator Module The neural network modelling likelihood-to-evidence ratio. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the likelihood-to-evidence ratio. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/ratio_based_potential.py def ratio_estimator_based_potential ( ratio_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns the potential for ratio-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Args: ratio_estimator: The neural network modelling likelihood-to-evidence ratio. prior: The prior distribution. x_o: The observed data at which to evaluate the likelihood-to-evidence ratio. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( ratio_estimator . parameters ()) . device ) potential_fn = RatioBasedPotential ( ratio_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform","title":"ratio_estimator_based_potential()"},{"location":"reference/#analysis","text":"","title":"Analysis"},{"location":"reference/#sbi.analysis.plot.pairplot","text":"Plot samples in a 2D grid showing marginals and pairwise marginals. Each of the diagonal plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Each upper-diagonal plot can be interpreted as a 2D-marginal of the distribution. Parameters: Name Type Description Default samples Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Samples used to build the histogram. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] List of additional points to scatter. None limits Union[List, torch.Tensor] Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on). None offdiag Union[str, List[str]] Plotting style for upper diagonal, {hist, scatter, contour, cond, None}. 'hist' upper Optional[str] deprecated, use offdiag instead. None diag Union[str, List[str]] Plotting style for diagonal, {hist, cond, None}. 'hist' figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def pairplot ( samples : Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , limits : Optional [ Union [ List , torch . Tensor ]] = None , subset : Optional [ List [ int ]] = None , offdiag : Optional [ Union [ List [ str ], str ]] = \"hist\" , diag : Optional [ Union [ List [ str ], str ]] = \"hist\" , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], upper : Optional [ str ] = None , fig = None , axes = None , ** kwargs , ): \"\"\" Plot samples in a 2D grid showing marginals and pairwise marginals. Each of the diagonal plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Each upper-diagonal plot can be interpreted as a 2D-marginal of the distribution. Args: samples: Samples used to build the histogram. points: List of additional points to scatter. limits: Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on). offdiag: Plotting style for upper diagonal, {hist, scatter, contour, cond, None}. upper: deprecated, use offdiag instead. diag: Plotting style for diagonal, {hist, cond, None}. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" # TODO: add color map support # TODO: automatically determine good bin sizes for histograms # TODO: add legend (if legend is True) opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) samples , dim , limits = prepare_for_plot ( samples , limits ) # checks. if opts [ \"legend\" ]: assert len ( opts [ \"samples_labels\" ]) >= len ( samples ), \"Provide at least as many labels as samples.\" if opts [ \"upper\" ] is not None : warn ( \"upper is deprecated, use offdiag instead.\" ) opts [ \"offdiag\" ] = opts [ \"upper\" ] # Prepare diag/upper/lower if type ( opts [ \"diag\" ]) is not list : opts [ \"diag\" ] = [ opts [ \"diag\" ] for _ in range ( len ( samples ))] if type ( opts [ \"offdiag\" ]) is not list : opts [ \"offdiag\" ] = [ opts [ \"offdiag\" ] for _ in range ( len ( samples ))] # if type(opts['lower']) is not list: # opts['lower'] = [opts['lower'] for _ in range(len(samples))] opts [ \"lower\" ] = None diag_func = get_diag_func ( samples , limits , opts , ** kwargs ) def offdiag_func ( row , col , limits , ** kwargs ): if len ( samples ) > 0 : for n , v in enumerate ( samples ): if opts [ \"offdiag\" ][ n ] == \"hist\" or opts [ \"offdiag\" ][ n ] == \"hist2d\" : hist , xedges , yedges = np . histogram2d ( v [:, col ], v [:, row ], range = [ [ limits [ col ][ 0 ], limits [ col ][ 1 ]], [ limits [ row ][ 0 ], limits [ row ][ 1 ]], ], ** opts [ \"hist_offdiag\" ], ) plt . imshow ( hist . T , origin = \"lower\" , extent = ( xedges [ 0 ], xedges [ - 1 ], yedges [ 0 ], yedges [ - 1 ], ), aspect = \"auto\" , ) elif opts [ \"offdiag\" ][ n ] in [ \"kde\" , \"kde2d\" , \"contour\" , \"contourf\" , ]: density = gaussian_kde ( v [:, [ col , row ]] . T , bw_method = opts [ \"kde_offdiag\" ][ \"bw_method\" ], ) X , Y = np . meshgrid ( np . linspace ( limits [ col ][ 0 ], limits [ col ][ 1 ], opts [ \"kde_offdiag\" ][ \"bins\" ], ), np . linspace ( limits [ row ][ 0 ], limits [ row ][ 1 ], opts [ \"kde_offdiag\" ][ \"bins\" ], ), ) positions = np . vstack ([ X . ravel (), Y . ravel ()]) Z = np . reshape ( density ( positions ) . T , X . shape ) if opts [ \"offdiag\" ][ n ] == \"kde\" or opts [ \"offdiag\" ][ n ] == \"kde2d\" : plt . imshow ( Z , extent = ( limits [ col ][ 0 ], limits [ col ][ 1 ], limits [ row ][ 0 ], limits [ row ][ 1 ], ), origin = \"lower\" , aspect = \"auto\" , ) elif opts [ \"offdiag\" ][ n ] == \"contour\" : if opts [ \"contour_offdiag\" ][ \"percentile\" ]: Z = probs2contours ( Z , opts [ \"contour_offdiag\" ][ \"levels\" ]) else : Z = ( Z - Z . min ()) / ( Z . max () - Z . min ()) plt . contour ( X , Y , Z , origin = \"lower\" , extent = [ limits [ col ][ 0 ], limits [ col ][ 1 ], limits [ row ][ 0 ], limits [ row ][ 1 ], ], colors = opts [ \"samples_colors\" ][ n ], levels = opts [ \"contour_offdiag\" ][ \"levels\" ], ) else : pass elif opts [ \"offdiag\" ][ n ] == \"scatter\" : plt . scatter ( v [:, col ], v [:, row ], color = opts [ \"samples_colors\" ][ n ], ** opts [ \"scatter_offdiag\" ], ) elif opts [ \"offdiag\" ][ n ] == \"plot\" : plt . plot ( v [:, col ], v [:, row ], color = opts [ \"samples_colors\" ][ n ], ** opts [ \"plot_offdiag\" ], ) else : pass return _arrange_plots ( diag_func , offdiag_func , dim , limits , points , opts , fig = fig , axes = axes )","title":"pairplot()"},{"location":"reference/#sbi.analysis.plot.marginal_plot","text":"Plot samples in a row showing 1D marginals of selected dimensions. Each of the plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Parameters: Name Type Description Default samples Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Samples used to build the histogram. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] List of additional points to scatter. None limits Union[List, torch.Tensor] Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on). None diag Optional[str] Plotting style for 1D marginals, {hist, kde cond, None}. 'hist' figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] points_colors Colors of the points . required fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def marginal_plot ( samples : Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , limits : Optional [ Union [ List , torch . Tensor ]] = None , subset : Optional [ List [ int ]] = None , diag : Optional [ str ] = \"hist\" , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], fig = None , axes = None , ** kwargs , ): \"\"\" Plot samples in a row showing 1D marginals of selected dimensions. Each of the plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Args: samples: Samples used to build the histogram. points: List of additional points to scatter. limits: Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on). diag: Plotting style for 1D marginals, {hist, kde cond, None}. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. points_colors: Colors of the `points`. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) samples , dim , limits = prepare_for_plot ( samples , limits ) # Prepare diag/upper/lower if type ( opts [ \"diag\" ]) is not list : opts [ \"diag\" ] = [ opts [ \"diag\" ] for _ in range ( len ( samples ))] diag_func = get_diag_func ( samples , limits , opts , ** kwargs ) return _arrange_plots ( diag_func , None , dim , limits , points , opts , fig = fig , axes = axes )","title":"marginal_plot()"},{"location":"reference/#sbi.analysis.plot.conditional_pairplot","text":"Plot conditional distribution given all other parameters. The conditionals can be interpreted as slices through the density at a location given by condition . For example: Say we have a 3D density with parameters \\(\\theta_0\\) , \\(\\theta_1\\) , \\(\\theta_2\\) and a condition \\(c\\) passed by the user in the condition argument. For the plot of \\(\\theta_0\\) on the diagonal, this will plot the conditional \\(p(\\theta_0 | \\theta_1=c[1], \\theta_2=c[2])\\) . For the upper diagonal of \\(\\theta_1\\) and \\(\\theta_2\\) , it will plot \\(p(\\theta_1, \\theta_2 | \\theta_0=c[0])\\) . All other diagonals and upper-diagonals are built in the corresponding way. Parameters: Name Type Description Default density Any Probability density with a log_prob() method. required condition Tensor Condition that all but the one/two regarded parameters are fixed to. The condition should be of shape (1, dim_theta), i.e. it could e.g. be a sample from the posterior distribution. required limits Union[List, torch.Tensor] Limits in between which each parameter will be evaluated. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Additional points to scatter. None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on) None resolution int Resolution of the grid at which we evaluate the pdf . 50 figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] points_colors Colors of the points . required fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def conditional_pairplot ( density : Any , condition : torch . Tensor , limits : Union [ List , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , subset : Optional [ List [ int ]] = None , resolution : int = 50 , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], fig = None , axes = None , ** kwargs , ): r \"\"\" Plot conditional distribution given all other parameters. The conditionals can be interpreted as slices through the `density` at a location given by `condition`. For example: Say we have a 3D density with parameters $\\theta_0$, $\\theta_1$, $\\theta_2$ and a condition $c$ passed by the user in the `condition` argument. For the plot of $\\theta_0$ on the diagonal, this will plot the conditional $p(\\theta_0 | \\theta_1=c[1], \\theta_2=c[2])$. For the upper diagonal of $\\theta_1$ and $\\theta_2$, it will plot $p(\\theta_1, \\theta_2 | \\theta_0=c[0])$. All other diagonals and upper-diagonals are built in the corresponding way. Args: density: Probability density with a `log_prob()` method. condition: Condition that all but the one/two regarded parameters are fixed to. The condition should be of shape (1, dim_theta), i.e. it could e.g. be a sample from the posterior distribution. limits: Limits in between which each parameter will be evaluated. points: Additional points to scatter. subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on) resolution: Resolution of the grid at which we evaluate the `pdf`. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. points_colors: Colors of the `points`. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" device = density . _device if hasattr ( density , \"_device\" ) else \"cpu\" # Setting these is required because _pairplot_scaffold will check if opts['diag'] is # `None`. This would break if opts has no key 'diag'. Same for 'upper'. diag = \"cond\" offdiag = \"cond\" opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) opts [ \"lower\" ] = None dim , limits , eps_margins = prepare_for_conditional_plot ( condition , opts ) diag_func = get_conditional_diag_func ( opts , limits , eps_margins , resolution ) def offdiag_func ( row , col , ** kwargs ): p_image = ( eval_conditional_density ( opts [ \"density\" ], opts [ \"condition\" ] . to ( device ), limits . to ( device ), row , col , resolution = resolution , eps_margins1 = eps_margins [ row ], eps_margins2 = eps_margins [ col ], ) . to ( \"cpu\" ) . numpy () ) plt . imshow ( p_image . T , origin = \"lower\" , extent = ( limits [ col , 0 ] . item (), limits [ col , 1 ] . item (), limits [ row , 0 ] . item (), limits [ row , 1 ] . item (), ), aspect = \"auto\" , ) return _arrange_plots ( diag_func , offdiag_func , dim , limits , points , opts , fig = fig , axes = axes )","title":"conditional_pairplot()"},{"location":"reference/#sbi.analysis.conditional_density.conditional_corrcoeff","text":"Returns the conditional correlation matrix of a distribution. To compute the conditional distribution, we condition all but two parameters to values from condition , and then compute the Pearson correlation coefficient \\(\\rho\\) between the remaining two parameters under the distribution density . We do so for any pair of parameters specified in subset , thus creating a matrix containing conditional correlations between any pair of parameters. If condition is a batch of conditions, this function computes the conditional correlation matrix for each one of them and returns the mean. Parameters: Name Type Description Default density Any Probability density function with .log_prob() function. required limits Tensor Limits within which to evaluate the density . required condition Tensor Values to condition the density on. If a batch of conditions is passed, we compute the conditional correlation matrix for each of them and return the average conditional correlation matrix. required subset Optional[List[int]] Evaluate the conditional distribution only on a subset of dimensions. If None this function uses all dimensions. None resolution int Number of grid points on which the conditional distribution is evaluated. A higher value increases the accuracy of the estimated correlation but also increases the computational cost. 50 Returns: Average conditional correlation matrix of shape either (num_dim, num_dim) or (len(subset), len(subset)) if subset was specified. Source code in sbi/analysis/conditional_density.py def conditional_corrcoeff ( density : Any , limits : Tensor , condition : Tensor , subset : Optional [ List [ int ]] = None , resolution : int = 50 , ) -> Tensor : r \"\"\"Returns the conditional correlation matrix of a distribution. To compute the conditional distribution, we condition all but two parameters to values from `condition`, and then compute the Pearson correlation coefficient $\\rho$ between the remaining two parameters under the distribution `density`. We do so for any pair of parameters specified in `subset`, thus creating a matrix containing conditional correlations between any pair of parameters. If `condition` is a batch of conditions, this function computes the conditional correlation matrix for each one of them and returns the mean. Args: density: Probability density function with `.log_prob()` function. limits: Limits within which to evaluate the `density`. condition: Values to condition the `density` on. If a batch of conditions is passed, we compute the conditional correlation matrix for each of them and return the average conditional correlation matrix. subset: Evaluate the conditional distribution only on a subset of dimensions. If `None` this function uses all dimensions. resolution: Number of grid points on which the conditional distribution is evaluated. A higher value increases the accuracy of the estimated correlation but also increases the computational cost. Returns: Average conditional correlation matrix of shape either `(num_dim, num_dim)` or `(len(subset), len(subset))` if `subset` was specified. \"\"\" device = density . _device if hasattr ( density , \"_device\" ) else \"cpu\" subset_ = subset if subset is not None else range ( condition . shape [ 1 ]) correlation_matrices = [] for cond in condition : correlation_matrices . append ( torch . stack ( [ compute_corrcoeff ( eval_conditional_density ( density , cond . to ( device ), limits . to ( device ), dim1 = dim1 , dim2 = dim2 , resolution = resolution , ), limits [[ dim1 , dim2 ]] . to ( device ), ) for dim1 in subset_ for dim2 in subset_ if dim1 < dim2 ] ) ) average_correlations = torch . mean ( torch . stack ( correlation_matrices ), dim = 0 ) # `average_correlations` is still a vector containing the upper triangular entries. # Below, assemble them into a matrix: av_correlation_matrix = torch . zeros (( len ( subset_ ), len ( subset_ )), device = device ) triu_indices = torch . triu_indices ( row = len ( subset_ ), col = len ( subset_ ), offset = 1 , device = device ) av_correlation_matrix [ triu_indices [ 0 ], triu_indices [ 1 ]] = average_correlations # Make the matrix symmetric by copying upper diagonal to lower diagonal. av_correlation_matrix = torch . triu ( av_correlation_matrix ) + torch . tril ( av_correlation_matrix . T ) av_correlation_matrix . fill_diagonal_ ( 1.0 ) return av_correlation_matrix","title":"conditional_corrcoeff()"},{"location":"examples/00_HH_simulator/","text":"Inference on Hodgkin-Huxley model: tutorial \u00b6 In this tutorial, we use sbi to do inference on a Hodgkin-Huxley model from neuroscience (Hodgkin and Huxley, 1952). We will learn two parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ) based on a current-clamp recording, that we generate synthetically (in practice, this would be an experimental observation). Note, you find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/examples/00_HH_simulator.ipynb in the sbi repository. First we are going to import basic packages. import numpy as np import torch # visualization import matplotlib as mpl import matplotlib.pyplot as plt # sbi from sbi import utils as utils from sbi import analysis as analysis from sbi.inference.base import infer # remove top and right axis from plots mpl . rcParams [ \"axes.spines.right\" ] = False mpl . rcParams [ \"axes.spines.top\" ] = False Different required components \u00b6 Before running inference, let us define the different required components: observed data prior over model parameters simulator 1. Observed data \u00b6 Let us assume we current-clamped a neuron and recorded the following voltage trace: In fact, this voltage trace was not measured experimentally but synthetically generated by simulating a Hodgkin-Huxley model with particular parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ). We will come back to this point later in the tutorial. 2. Simulator \u00b6 We would like to infer the posterior over the two parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ) of a Hodgkin-Huxley model, given the observed electrophysiological recording above. The model has channel kinetics as in Pospischil et al. 2008 , and is defined by the following set of differential equations (parameters of interest highlighted in orange): \\[ \\scriptsize \\begin{align} C_m\\frac{dV}{dt}&=g_1\\left(E_1-V\\right)+ \\color{orange}{\\bar{g}_{Na}}m^3h\\left(E_{Na}-V\\right)+ \\color{orange}{\\bar{g}_{K}}n^4\\left(E_K-V\\right)+ \\bar{g}_Mp\\left(E_K-V\\right)+ I_{inj}+ \\sigma\\eta\\left(t\\right)\\\\ \\frac{dq}{dt}&=\\frac{q_\\infty\\left(V\\right)-q}{\\tau_q\\left(V\\right)},\\;q\\in\\{m,h,n,p\\} \\end{align} \\] Above, \\(V\\) represents the membrane potential, \\(C_m\\) is the membrane capacitance, \\(g_{\\text{l}}\\) is the leak conductance, \\(E_{\\text{l}}\\) is the membrane reversal potential, \\(\\bar{g}_c\\) is the density of channels of type \\(c\\) ( \\(\\text{Na}^+\\) , \\(\\text{K}^+\\) , M), \\(E_c\\) is the reversal potential of \\(c\\) , ( \\(m\\) , \\(h\\) , \\(n\\) , \\(p\\) ) are the respective channel gating kinetic variables, and \\(\\sigma \\eta(t)\\) is the intrinsic neural noise. The right hand side of the voltage dynamics is composed of a leak current, a voltage-dependent \\(\\text{Na}^+\\) current, a delayed-rectifier \\(\\text{K}^+\\) current, a slow voltage-dependent \\(\\text{K}^+\\) current responsible for spike-frequency adaptation, and an injected current \\(I_{\\text{inj}}\\) . Channel gating variables \\(q\\) have dynamics fully characterized by the neuron membrane potential \\(V\\) , given the respective steady-state \\(q_{\\infty}(V)\\) and time constant \\(\\tau_{q}(V)\\) (details in Pospischil et al. 2008). The input current \\(I_{\\text{inj}}\\) is defined as from HH_helper_functions import syn_current I , t_on , t_off , dt , t , A_soma = syn_current () The Hodgkin-Huxley simulator is given by: from HH_helper_functions import HHsimulator Putting the input current and the simulator together: def run_HH_model ( params ): params = np . asarray ( params ) # input current, time step I , t_on , t_off , dt , t , A_soma = syn_current () t = np . arange ( 0 , len ( I ), 1 ) * dt # initial voltage V0 = - 70 states = HHsimulator ( V0 , params . reshape ( 1 , - 1 ), dt , t , I ) return dict ( data = states . reshape ( - 1 ), time = t , dt = dt , I = I . reshape ( - 1 )) To get an idea of the output of the Hodgkin-Huxley model, let us generate some voltage traces for different parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), given the input current \\(I_{\\text{inj}}\\) : # three sets of (g_Na, g_K) params = np . array ([[ 50.0 , 1.0 ], [ 4.0 , 1.5 ], [ 20.0 , 15.0 ]]) num_samples = len ( params [:, 0 ]) sim_samples = np . zeros (( num_samples , len ( I ))) for i in range ( num_samples ): sim_samples [ i , :] = run_HH_model ( params = params [ i , :])[ \"data\" ] # colors for traces col_min = 2 num_colors = num_samples + col_min cm1 = mpl . cm . Blues col1 = [ cm1 ( 1.0 * i / num_colors ) for i in range ( col_min , num_colors )] fig = plt . figure ( figsize = ( 7 , 5 )) gs = mpl . gridspec . GridSpec ( 2 , 1 , height_ratios = [ 4 , 1 ]) ax = plt . subplot ( gs [ 0 ]) for i in range ( num_samples ): plt . plot ( t , sim_samples [ i , :], color = col1 [ i ], lw = 2 ) plt . ylabel ( \"voltage (mV)\" ) ax . set_xticks ([]) ax . set_yticks ([ - 80 , - 20 , 40 ]) ax = plt . subplot ( gs [ 1 ]) plt . plot ( t , I * A_soma * 1e3 , \"k\" , lw = 2 ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"input (nA)\" ) ax . set_xticks ([ 0 , max ( t ) / 2 , max ( t )]) ax . set_yticks ([ 0 , 1.1 * np . max ( I * A_soma * 1e3 )]) ax . yaxis . set_major_formatter ( mpl . ticker . FormatStrFormatter ( \" %.2f \" )) plt . show () As can be seen, the voltage traces can be quite diverse for different parameter values. Often, we are not interested in matching the exact trace, but only in matching certain features thereof. In this example of the Hodgkin-Huxley model, the summary features are the number of spikes, the mean resting potential, the standard deviation of the resting potential, and the first four voltage moments: mean, standard deviation, skewness and kurtosis. Using the function calculate_summary_statistics() imported below, we obtain these statistics from the output of the Hodgkin Huxley simulator. from HH_helper_functions import calculate_summary_statistics Lastly, we define a function that performs all of the above steps at once. The function simulation_wrapper takes in conductance values, runs the Hodgkin Huxley model and then returns the summary statistics. def simulation_wrapper ( params ): \"\"\" Returns summary statistics from conductance values in `params`. Summarizes the output of the HH simulator and converts it to `torch.Tensor`. \"\"\" obs = run_HH_model ( params ) summstats = torch . as_tensor ( calculate_summary_statistics ( obs )) return summstats sbi takes any function as simulator. Thus, sbi also has the flexibility to use simulators that utilize external packages, e.g., Brian ( http://briansimulator.org/ ), nest ( https://www.nest-simulator.org/ ), or NEURON ( https://neuron.yale.edu/neuron/ ). External simulators do not even need to be Python-based as long as they store simulation outputs in a format that can be read from Python. All that is necessary is to wrap your external simulator of choice into a Python callable that takes a parameter set and outputs a set of summary statistics we want to fit the parameters to. 3. Prior over model parameters \u00b6 Now that we have the simulator, we need to define a function with the prior over the model parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), which in this case is chosen to be a Uniform distribution: prior_min = [ 0.5 , 1e-4 ] prior_max = [ 80.0 , 15.0 ] prior = utils . torchutils . BoxUniform ( low = torch . as_tensor ( prior_min ), high = torch . as_tensor ( prior_max ) ) Inference \u00b6 Now that we have all the required components, we can run inference with SNPE to identify parameters whose activity matches this trace. posterior = infer ( simulation_wrapper , prior , method = \"SNPE\" , num_simulations = 300 , num_workers = 4 ) HBox(children=(FloatProgress(value=0.0, description='Running 300 simulations in 300 batches.', max=300.0, styl\u2026 Neural network successfully converged after 233 epochs. Note sbi can parallelize your simulator. If you experience problems with parallelization, try setting num_workers=1 and please give us an error report as a GitHub issue . Coming back to the observed data \u00b6 As mentioned at the beginning of the tutorial, the observed data are generated by the Hodgkin-Huxley model with a set of known parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ). To illustrate how to compute the summary statistics of the observed data, let us regenerate the observed data: # true parameters and respective labels true_params = np . array ([ 50.0 , 5.0 ]) labels_params = [ r \"$g_ {Na} $\" , r \"$g_ {K} $\" ] observation_trace = run_HH_model ( true_params ) observation_summary_statistics = calculate_summary_statistics ( observation_trace ) As we already shown above, the observed voltage traces look as follows: fig = plt . figure ( figsize = ( 7 , 5 )) gs = mpl . gridspec . GridSpec ( 2 , 1 , height_ratios = [ 4 , 1 ]) ax = plt . subplot ( gs [ 0 ]) plt . plot ( observation_trace [ \"time\" ], observation_trace [ \"data\" ]) plt . ylabel ( \"voltage (mV)\" ) plt . title ( \"observed data\" ) plt . setp ( ax , xticks = [], yticks = [ - 80 , - 20 , 40 ]) ax = plt . subplot ( gs [ 1 ]) plt . plot ( observation_trace [ \"time\" ], I * A_soma * 1e3 , \"k\" , lw = 2 ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"input (nA)\" ) ax . set_xticks ([ 0 , max ( observation_trace [ \"time\" ]) / 2 , max ( observation_trace [ \"time\" ])]) ax . set_yticks ([ 0 , 1.1 * np . max ( I * A_soma * 1e3 )]) ax . yaxis . set_major_formatter ( mpl . ticker . FormatStrFormatter ( \" %.2f \" )) Analysis of the posterior given the observed data \u00b6 After running the inference algorithm, let us inspect the inferred posterior distribution over the parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), given the observed trace. To do so, we first draw samples (i.e. consistent parameter sets) from the posterior: samples = posterior . sample (( 10000 ,), x = observation_summary_statistics ) HBox(children=(FloatProgress(value=0.0, description='Drawing 10000 posterior samples', max=10000.0, style=Prog\u2026 fig , axes = analysis . pairplot ( samples , limits = [[ 0.5 , 80 ], [ 1e-4 , 15.0 ]], ticks = [[ 0.5 , 80 ], [ 1e-4 , 15.0 ]], figsize = ( 5 , 5 ), points = true_params , points_offdiag = { \"markersize\" : 6 }, points_colors = \"r\" , ); As can be seen, the inferred posterior contains the ground-truth parameters (red) in a high-probability region. Now, let us sample parameters from the posterior distribution, simulate the Hodgkin-Huxley model for this parameter set and compare the simulations with the observed data: # Draw a sample from the posterior and convert to numpy for plotting. posterior_sample = posterior . sample (( 1 ,), x = observation_summary_statistics ) . numpy () HBox(children=(FloatProgress(value=0.0, description='Drawing 1 posterior samples', max=1.0, style=ProgressStyl\u2026 fig = plt . figure ( figsize = ( 7 , 5 )) # plot observation t = observation_trace [ \"time\" ] y_obs = observation_trace [ \"data\" ] plt . plot ( t , y_obs , lw = 2 , label = \"observation\" ) # simulate and plot samples x = run_HH_model ( posterior_sample ) plt . plot ( t , x [ \"data\" ], \"--\" , lw = 2 , label = \"posterior sample\" ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"voltage (mV)\" ) ax = plt . gca () handles , labels = ax . get_legend_handles_labels () ax . legend ( handles [:: - 1 ], labels [:: - 1 ], bbox_to_anchor = ( 1.3 , 1 ), loc = \"upper right\" ) ax . set_xticks ([ 0 , 60 , 120 ]) ax . set_yticks ([ - 80 , - 20 , 40 ]); As can be seen, the sample from the inferred posterior leads to simulations that closely resemble the observed data, confirming that SNPE did a good job at capturing the observed data in this simple case. References \u00b6 A. L. Hodgkin and A. F. Huxley. A quantitative description of membrane current and its application to conduction and excitation in nerve. The Journal of Physiology, 117(4):500\u2013544, 1952. M. Pospischil, M. Toledo-Rodriguez, C. Monier, Z. Piwkowska, T. Bal, Y. Fr\u00e9gnac, H. Markram, and A. Destexhe. Minimal Hodgkin-Huxley type models for different classes of cortical and thalamic neurons. Biological Cybernetics, 99(4-5), 2008.","title":"Hodgkin-Huxley example"},{"location":"examples/00_HH_simulator/#inference-on-hodgkin-huxley-model-tutorial","text":"In this tutorial, we use sbi to do inference on a Hodgkin-Huxley model from neuroscience (Hodgkin and Huxley, 1952). We will learn two parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ) based on a current-clamp recording, that we generate synthetically (in practice, this would be an experimental observation). Note, you find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/examples/00_HH_simulator.ipynb in the sbi repository. First we are going to import basic packages. import numpy as np import torch # visualization import matplotlib as mpl import matplotlib.pyplot as plt # sbi from sbi import utils as utils from sbi import analysis as analysis from sbi.inference.base import infer # remove top and right axis from plots mpl . rcParams [ \"axes.spines.right\" ] = False mpl . rcParams [ \"axes.spines.top\" ] = False","title":"Inference on Hodgkin-Huxley model: tutorial"},{"location":"examples/00_HH_simulator/#different-required-components","text":"Before running inference, let us define the different required components: observed data prior over model parameters simulator","title":"Different required components"},{"location":"examples/00_HH_simulator/#1-observed-data","text":"Let us assume we current-clamped a neuron and recorded the following voltage trace: In fact, this voltage trace was not measured experimentally but synthetically generated by simulating a Hodgkin-Huxley model with particular parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ). We will come back to this point later in the tutorial.","title":"1. Observed data"},{"location":"examples/00_HH_simulator/#2-simulator","text":"We would like to infer the posterior over the two parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ) of a Hodgkin-Huxley model, given the observed electrophysiological recording above. The model has channel kinetics as in Pospischil et al. 2008 , and is defined by the following set of differential equations (parameters of interest highlighted in orange): \\[ \\scriptsize \\begin{align} C_m\\frac{dV}{dt}&=g_1\\left(E_1-V\\right)+ \\color{orange}{\\bar{g}_{Na}}m^3h\\left(E_{Na}-V\\right)+ \\color{orange}{\\bar{g}_{K}}n^4\\left(E_K-V\\right)+ \\bar{g}_Mp\\left(E_K-V\\right)+ I_{inj}+ \\sigma\\eta\\left(t\\right)\\\\ \\frac{dq}{dt}&=\\frac{q_\\infty\\left(V\\right)-q}{\\tau_q\\left(V\\right)},\\;q\\in\\{m,h,n,p\\} \\end{align} \\] Above, \\(V\\) represents the membrane potential, \\(C_m\\) is the membrane capacitance, \\(g_{\\text{l}}\\) is the leak conductance, \\(E_{\\text{l}}\\) is the membrane reversal potential, \\(\\bar{g}_c\\) is the density of channels of type \\(c\\) ( \\(\\text{Na}^+\\) , \\(\\text{K}^+\\) , M), \\(E_c\\) is the reversal potential of \\(c\\) , ( \\(m\\) , \\(h\\) , \\(n\\) , \\(p\\) ) are the respective channel gating kinetic variables, and \\(\\sigma \\eta(t)\\) is the intrinsic neural noise. The right hand side of the voltage dynamics is composed of a leak current, a voltage-dependent \\(\\text{Na}^+\\) current, a delayed-rectifier \\(\\text{K}^+\\) current, a slow voltage-dependent \\(\\text{K}^+\\) current responsible for spike-frequency adaptation, and an injected current \\(I_{\\text{inj}}\\) . Channel gating variables \\(q\\) have dynamics fully characterized by the neuron membrane potential \\(V\\) , given the respective steady-state \\(q_{\\infty}(V)\\) and time constant \\(\\tau_{q}(V)\\) (details in Pospischil et al. 2008). The input current \\(I_{\\text{inj}}\\) is defined as from HH_helper_functions import syn_current I , t_on , t_off , dt , t , A_soma = syn_current () The Hodgkin-Huxley simulator is given by: from HH_helper_functions import HHsimulator Putting the input current and the simulator together: def run_HH_model ( params ): params = np . asarray ( params ) # input current, time step I , t_on , t_off , dt , t , A_soma = syn_current () t = np . arange ( 0 , len ( I ), 1 ) * dt # initial voltage V0 = - 70 states = HHsimulator ( V0 , params . reshape ( 1 , - 1 ), dt , t , I ) return dict ( data = states . reshape ( - 1 ), time = t , dt = dt , I = I . reshape ( - 1 )) To get an idea of the output of the Hodgkin-Huxley model, let us generate some voltage traces for different parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), given the input current \\(I_{\\text{inj}}\\) : # three sets of (g_Na, g_K) params = np . array ([[ 50.0 , 1.0 ], [ 4.0 , 1.5 ], [ 20.0 , 15.0 ]]) num_samples = len ( params [:, 0 ]) sim_samples = np . zeros (( num_samples , len ( I ))) for i in range ( num_samples ): sim_samples [ i , :] = run_HH_model ( params = params [ i , :])[ \"data\" ] # colors for traces col_min = 2 num_colors = num_samples + col_min cm1 = mpl . cm . Blues col1 = [ cm1 ( 1.0 * i / num_colors ) for i in range ( col_min , num_colors )] fig = plt . figure ( figsize = ( 7 , 5 )) gs = mpl . gridspec . GridSpec ( 2 , 1 , height_ratios = [ 4 , 1 ]) ax = plt . subplot ( gs [ 0 ]) for i in range ( num_samples ): plt . plot ( t , sim_samples [ i , :], color = col1 [ i ], lw = 2 ) plt . ylabel ( \"voltage (mV)\" ) ax . set_xticks ([]) ax . set_yticks ([ - 80 , - 20 , 40 ]) ax = plt . subplot ( gs [ 1 ]) plt . plot ( t , I * A_soma * 1e3 , \"k\" , lw = 2 ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"input (nA)\" ) ax . set_xticks ([ 0 , max ( t ) / 2 , max ( t )]) ax . set_yticks ([ 0 , 1.1 * np . max ( I * A_soma * 1e3 )]) ax . yaxis . set_major_formatter ( mpl . ticker . FormatStrFormatter ( \" %.2f \" )) plt . show () As can be seen, the voltage traces can be quite diverse for different parameter values. Often, we are not interested in matching the exact trace, but only in matching certain features thereof. In this example of the Hodgkin-Huxley model, the summary features are the number of spikes, the mean resting potential, the standard deviation of the resting potential, and the first four voltage moments: mean, standard deviation, skewness and kurtosis. Using the function calculate_summary_statistics() imported below, we obtain these statistics from the output of the Hodgkin Huxley simulator. from HH_helper_functions import calculate_summary_statistics Lastly, we define a function that performs all of the above steps at once. The function simulation_wrapper takes in conductance values, runs the Hodgkin Huxley model and then returns the summary statistics. def simulation_wrapper ( params ): \"\"\" Returns summary statistics from conductance values in `params`. Summarizes the output of the HH simulator and converts it to `torch.Tensor`. \"\"\" obs = run_HH_model ( params ) summstats = torch . as_tensor ( calculate_summary_statistics ( obs )) return summstats sbi takes any function as simulator. Thus, sbi also has the flexibility to use simulators that utilize external packages, e.g., Brian ( http://briansimulator.org/ ), nest ( https://www.nest-simulator.org/ ), or NEURON ( https://neuron.yale.edu/neuron/ ). External simulators do not even need to be Python-based as long as they store simulation outputs in a format that can be read from Python. All that is necessary is to wrap your external simulator of choice into a Python callable that takes a parameter set and outputs a set of summary statistics we want to fit the parameters to.","title":"2. Simulator"},{"location":"examples/00_HH_simulator/#3-prior-over-model-parameters","text":"Now that we have the simulator, we need to define a function with the prior over the model parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), which in this case is chosen to be a Uniform distribution: prior_min = [ 0.5 , 1e-4 ] prior_max = [ 80.0 , 15.0 ] prior = utils . torchutils . BoxUniform ( low = torch . as_tensor ( prior_min ), high = torch . as_tensor ( prior_max ) )","title":"3. Prior over model parameters"},{"location":"examples/00_HH_simulator/#inference","text":"Now that we have all the required components, we can run inference with SNPE to identify parameters whose activity matches this trace. posterior = infer ( simulation_wrapper , prior , method = \"SNPE\" , num_simulations = 300 , num_workers = 4 ) HBox(children=(FloatProgress(value=0.0, description='Running 300 simulations in 300 batches.', max=300.0, styl\u2026 Neural network successfully converged after 233 epochs. Note sbi can parallelize your simulator. If you experience problems with parallelization, try setting num_workers=1 and please give us an error report as a GitHub issue .","title":"Inference"},{"location":"examples/00_HH_simulator/#coming-back-to-the-observed-data","text":"As mentioned at the beginning of the tutorial, the observed data are generated by the Hodgkin-Huxley model with a set of known parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ). To illustrate how to compute the summary statistics of the observed data, let us regenerate the observed data: # true parameters and respective labels true_params = np . array ([ 50.0 , 5.0 ]) labels_params = [ r \"$g_ {Na} $\" , r \"$g_ {K} $\" ] observation_trace = run_HH_model ( true_params ) observation_summary_statistics = calculate_summary_statistics ( observation_trace ) As we already shown above, the observed voltage traces look as follows: fig = plt . figure ( figsize = ( 7 , 5 )) gs = mpl . gridspec . GridSpec ( 2 , 1 , height_ratios = [ 4 , 1 ]) ax = plt . subplot ( gs [ 0 ]) plt . plot ( observation_trace [ \"time\" ], observation_trace [ \"data\" ]) plt . ylabel ( \"voltage (mV)\" ) plt . title ( \"observed data\" ) plt . setp ( ax , xticks = [], yticks = [ - 80 , - 20 , 40 ]) ax = plt . subplot ( gs [ 1 ]) plt . plot ( observation_trace [ \"time\" ], I * A_soma * 1e3 , \"k\" , lw = 2 ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"input (nA)\" ) ax . set_xticks ([ 0 , max ( observation_trace [ \"time\" ]) / 2 , max ( observation_trace [ \"time\" ])]) ax . set_yticks ([ 0 , 1.1 * np . max ( I * A_soma * 1e3 )]) ax . yaxis . set_major_formatter ( mpl . ticker . FormatStrFormatter ( \" %.2f \" ))","title":"Coming back to the observed data"},{"location":"examples/00_HH_simulator/#analysis-of-the-posterior-given-the-observed-data","text":"After running the inference algorithm, let us inspect the inferred posterior distribution over the parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), given the observed trace. To do so, we first draw samples (i.e. consistent parameter sets) from the posterior: samples = posterior . sample (( 10000 ,), x = observation_summary_statistics ) HBox(children=(FloatProgress(value=0.0, description='Drawing 10000 posterior samples', max=10000.0, style=Prog\u2026 fig , axes = analysis . pairplot ( samples , limits = [[ 0.5 , 80 ], [ 1e-4 , 15.0 ]], ticks = [[ 0.5 , 80 ], [ 1e-4 , 15.0 ]], figsize = ( 5 , 5 ), points = true_params , points_offdiag = { \"markersize\" : 6 }, points_colors = \"r\" , ); As can be seen, the inferred posterior contains the ground-truth parameters (red) in a high-probability region. Now, let us sample parameters from the posterior distribution, simulate the Hodgkin-Huxley model for this parameter set and compare the simulations with the observed data: # Draw a sample from the posterior and convert to numpy for plotting. posterior_sample = posterior . sample (( 1 ,), x = observation_summary_statistics ) . numpy () HBox(children=(FloatProgress(value=0.0, description='Drawing 1 posterior samples', max=1.0, style=ProgressStyl\u2026 fig = plt . figure ( figsize = ( 7 , 5 )) # plot observation t = observation_trace [ \"time\" ] y_obs = observation_trace [ \"data\" ] plt . plot ( t , y_obs , lw = 2 , label = \"observation\" ) # simulate and plot samples x = run_HH_model ( posterior_sample ) plt . plot ( t , x [ \"data\" ], \"--\" , lw = 2 , label = \"posterior sample\" ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"voltage (mV)\" ) ax = plt . gca () handles , labels = ax . get_legend_handles_labels () ax . legend ( handles [:: - 1 ], labels [:: - 1 ], bbox_to_anchor = ( 1.3 , 1 ), loc = \"upper right\" ) ax . set_xticks ([ 0 , 60 , 120 ]) ax . set_yticks ([ - 80 , - 20 , 40 ]); As can be seen, the sample from the inferred posterior leads to simulations that closely resemble the observed data, confirming that SNPE did a good job at capturing the observed data in this simple case.","title":"Analysis of the posterior given the observed data"},{"location":"examples/00_HH_simulator/#references","text":"A. L. Hodgkin and A. F. Huxley. A quantitative description of membrane current and its application to conduction and excitation in nerve. The Journal of Physiology, 117(4):500\u2013544, 1952. M. Pospischil, M. Toledo-Rodriguez, C. Monier, Z. Piwkowska, T. Bal, Y. Fr\u00e9gnac, H. Markram, and A. Destexhe. Minimal Hodgkin-Huxley type models for different classes of cortical and thalamic neurons. Biological Cybernetics, 99(4-5), 2008.","title":"References"},{"location":"examples/01_decision_making_model/","text":"SBI for decision-making models \u00b6 In a previous tutorial , we showed how to use SBI with trial-based iid data. Such scenarios can arise, for example, in models of perceptual decision making. In addition to trial-based iid data points, these models often come with mixed data types and varying experimental conditions. Here, we show how sbi can be used to perform inference in such models with the MNLE method. Trial-based SBI with mixed data types \u00b6 In some cases, models with trial-based data additionally return data with mixed data types, e.g., continous and discrete data. For example, most computational models of decision-making have continuous reaction times and discrete choices as output. This can induce a problem when performing trial-based SBI that relies on learning a neural likelihood: It is challenging for most density estimators to handle both, continuous and discrete data at the same time. However, there is a recent SBI method for solving this problem, it\u2019s called Mixed Neural Likelihood Estimation (MNLE). It works just like NLE, but with mixed data types. The trick is that it learns two separate density estimators, one for the discrete part of the data, and one for the continuous part, and combines the two to obtain the final neural likelihood. Crucially, the continuous density estimator is trained conditioned on the output of the discrete one, such that statistical dependencies between the discrete and continuous data (e.g., between choices and reaction times) are modeled as well. The interested reader is referred to the original paper available here . MNLE was recently added to sbi (see this PR and also issue ) and follows the same API as SNLE . In this tutorial we will show how to apply MNLE to mixed data, and how to deal with varying experimental conditions. Toy problem for MNLE \u00b6 To illustrate MNLE we set up a toy simulator that outputs mixed data and for which we know the likelihood such we can obtain reference posterior samples via MCMC. Simulator : To simulate mixed data we do the following Sample reaction time from inverse Gamma Sample choices from Binomial Return reaction time \\(rt \\in (0, \\infty)\\) and choice index \\(c \\in \\{0, 1\\}\\) \\[ c \\sim \\text{Binomial}(\\rho) \\\\ rt \\sim \\text{InverseGamma}(\\alpha=2, \\beta) \\\\ \\] Prior : The priors of the two parameters \\(\\rho\\) and \\(\\beta\\) are independent. We define a Beta prior over the probabilty parameter of the Binomial used in the simulator and a Gamma prior over the shape-parameter of the inverse Gamma used in the simulator: \\[ p(\\beta, \\rho) = p(\\beta) \\; p(\\rho) ; \\\\ p(\\beta) = \\text{Gamma}(1, 0.5) \\\\ p(\\text{probs}) = \\text{Beta}(2, 2) \\] Because the InverseGamma and the Binomial likelihoods are well-defined we can perform MCMC on this problem and obtain reference-posterior samples. import matplotlib.pyplot as plt import torch from torch import Tensor from sbi.inference import MNLE from pyro.distributions import InverseGamma from torch.distributions import Beta , Binomial , Categorical , Gamma from sbi.utils import MultipleIndependent from sbi.utils.metrics import c2st from sbi.analysis import pairplot from sbi.inference import MCMCPosterior from sbi.utils.torchutils import atleast_2d from sbi.inference.potentials.likelihood_based_potential import ( MixedLikelihoodBasedPotential , ) from sbi.utils.conditional_density_utils import ConditionedPotential from sbi.utils import mcmc_transform from sbi.inference.potentials.base_potential import BasePotential # Toy simulator for mixed data def mixed_simulator ( theta : Tensor , concentration_scaling : float = 1.0 ): \"\"\"Returns a sample from a mixed distribution given parameters theta. Args: theta: batch of parameters, shape (batch_size, 2) concentration_scaling: scaling factor for the concentration parameter of the InverseGamma distribution, mimics an experimental condition. \"\"\" beta , ps = theta [:, : 1 ], theta [:, 1 :] choices = Binomial ( probs = ps ) . sample () rts = InverseGamma ( concentration = concentration_scaling * torch . ones_like ( beta ), rate = beta ) . sample () return torch . cat (( rts , choices ), dim = 1 ) # The potential function defines the ground truth likelihood and allows us to obtain reference posterior samples via MCMC. class PotentialFunctionProvider ( BasePotential ): allow_iid_x = True # type: ignore def __init__ ( self , prior , x_o , concentration_scaling = 1.0 , device = \"cpu\" ): super () . __init__ ( prior , x_o , device ) self . concentration_scaling = concentration_scaling def __call__ ( self , theta , track_gradients : bool = True ): theta = atleast_2d ( theta ) with torch . set_grad_enabled ( track_gradients ): iid_ll = self . iid_likelihood ( theta ) return iid_ll + self . prior . log_prob ( theta ) def iid_likelihood ( self , theta ): lp_choices = torch . stack ( [ Binomial ( probs = th . reshape ( 1 , - 1 )) . log_prob ( self . x_o [:, 1 :]) for th in theta [:, 1 :] ], dim = 1 , ) lp_rts = torch . stack ( [ InverseGamma ( concentration = self . concentration_scaling * torch . ones_like ( beta_i ), rate = beta_i , ) . log_prob ( self . x_o [:, : 1 ]) for beta_i in theta [:, : 1 ] ], dim = 1 , ) joint_likelihood = ( lp_choices + lp_rts ) . squeeze () assert joint_likelihood . shape == torch . Size ([ self . x_o . shape [ 0 ], theta . shape [ 0 ]]) return joint_likelihood . sum ( 0 ) # Define independent prior. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , ) Obtain reference-posterior samples via analytical likelihood and MCMC \u00b6 torch . manual_seed ( 42 ) num_trials = 10 num_samples = 1000 theta_o = prior . sample (( 1 ,)) x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) mcmc_kwargs = dict ( num_chains = 20 , warmup_steps = 50 , method = \"slice_np_vectorized\" , init_strategy = \"proposal\" , ) true_posterior = MCMCPosterior ( potential_fn = PotentialFunctionProvider ( prior , x_o ), proposal = prior , theta_transform = mcmc_transform ( prior , enable_transform = True ), ** mcmc_kwargs , ) true_samples = true_posterior . sample (( num_samples ,)) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 10 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00 1 , you might experience an error that a certain object from your simulator could not be pickled (an example can be found here ). This can be fixed by forcing sbi to pickle with dill instead of the default cloudpickle . To do so, adjust your code as follows: Install dill : pip install dill At the very beginning of your python script, set the pickler to dill : from joblib.externals.loky import set_loky_pickler set_loky_pickler ( \"dill\" ) Move all imports required by your simulator into the simulator: # Imports specified outside of the simulator will break dill: import torch def my_simulator ( parameters ): return torch . ones ( 1 , 10 ) # Therefore, move the imports into the simulator: def my_simulator ( parameters ): import torch return torch . ones ( 1 , 10 ) Alternative: parallelize yourself \u00b6 You can also write your own code to parallelize simulations with whatever multiprocessing framework you prefer. You can then simulate your data outside of sbi and pass the simulated data as shown in the flexible interface : Some more background \u00b6 sbi uses joblib to parallelize simulations, which in turn uses pickle or cloudpickle to serialize the simulator. Almost all simulators will be picklable with cloudpickle , but we have experienced issues e.g. with neuron simulators, see here .","title":"When using multiple workers, I get a pickling error. Can I still use multiprocessing?"},{"location":"faq/question_03/#when-using-multiple-workers-i-get-a-pickling-error-can-i-still-use-multiprocessing","text":"Yes, but you will have to make a few adjustments to your code. Some background: when using num_workers > 1 , you might experience an error that a certain object from your simulator could not be pickled (an example can be found here ). This can be fixed by forcing sbi to pickle with dill instead of the default cloudpickle . To do so, adjust your code as follows: Install dill : pip install dill At the very beginning of your python script, set the pickler to dill : from joblib.externals.loky import set_loky_pickler set_loky_pickler ( \"dill\" ) Move all imports required by your simulator into the simulator: # Imports specified outside of the simulator will break dill: import torch def my_simulator ( parameters ): return torch . ones ( 1 , 10 ) # Therefore, move the imports into the simulator: def my_simulator ( parameters ): import torch return torch . ones ( 1 , 10 )","title":"When using multiple workers, I get a pickling error. Can I still use multiprocessing?"},{"location":"faq/question_03/#alternative-parallelize-yourself","text":"You can also write your own code to parallelize simulations with whatever multiprocessing framework you prefer. You can then simulate your data outside of sbi and pass the simulated data as shown in the flexible interface :","title":"Alternative: parallelize yourself"},{"location":"faq/question_03/#some-more-background","text":"sbi uses joblib to parallelize simulations, which in turn uses pickle or cloudpickle to serialize the simulator. Almost all simulators will be picklable with cloudpickle , but we have experienced issues e.g. with neuron simulators, see here .","title":"Some more background"},{"location":"faq/question_04/","text":"Can I use the GPU for training the density estimator? \u00b6 TLDR; Yes, by passing device=\"cuda\" and by passing a prior that lives on the device name your passed. But no speed-ups for default density estimators. Yes. When creating the inference object in the flexible interface, you can pass the device as an argument, e.g., inference = SNPE ( prior , device = \"cuda\" , density_estimator = \"maf\" ) The device is set to \"cpu\" by default, and it can be set to anything, as long as it maps to an existing PyTorch CUDA device. sbi will take care of copying the net and the training data to and from the device . Note that the prior must be on the training device already, e.g., when passing device=\"cuda:0\" , make sure to pass a prior object that was created on that device, e.g., prior = torch.distributions.MultivariateNormal(loc=torch.zeros(2, device=\"cuda:0\"), covariance_matrix=torch.eye(2, device=\"cuda:0\")) . Performance \u00b6 Whether or not you reduce your training time when training on a GPU depends on the problem at hand. We provide a couple of default density estimators for SNPE , SNLE and SNRE , e.g., a mixture density network ( density_estimator=\"mdn\" ) or a Masked Autoregressive Flow ( density_estimator=\"maf\" ). For those default density estimators we do not expect a speed up. This is because the underlying neural networks are quite shallow and not tall, e.g., they do not have many parameters or matrix operations that profit a lot from being executed on the GPU. A speed up through training on the GPU will most likely become visible when you are using convolutional modules in your neural networks. E.g., when passing an embedding net for image processing like in this example: https://github.com/sbi-dev/sbi/blob/main/tutorials/05_embedding_net.ipynb .","title":"Can I use the GPU for training the density estimator?"},{"location":"faq/question_04/#can-i-use-the-gpu-for-training-the-density-estimator","text":"TLDR; Yes, by passing device=\"cuda\" and by passing a prior that lives on the device name your passed. But no speed-ups for default density estimators. Yes. When creating the inference object in the flexible interface, you can pass the device as an argument, e.g., inference = SNPE ( prior , device = \"cuda\" , density_estimator = \"maf\" ) The device is set to \"cpu\" by default, and it can be set to anything, as long as it maps to an existing PyTorch CUDA device. sbi will take care of copying the net and the training data to and from the device . Note that the prior must be on the training device already, e.g., when passing device=\"cuda:0\" , make sure to pass a prior object that was created on that device, e.g., prior = torch.distributions.MultivariateNormal(loc=torch.zeros(2, device=\"cuda:0\"), covariance_matrix=torch.eye(2, device=\"cuda:0\")) .","title":"Can I use the GPU for training the density estimator?"},{"location":"faq/question_04/#performance","text":"Whether or not you reduce your training time when training on a GPU depends on the problem at hand. We provide a couple of default density estimators for SNPE , SNLE and SNRE , e.g., a mixture density network ( density_estimator=\"mdn\" ) or a Masked Autoregressive Flow ( density_estimator=\"maf\" ). For those default density estimators we do not expect a speed up. This is because the underlying neural networks are quite shallow and not tall, e.g., they do not have many parameters or matrix operations that profit a lot from being executed on the GPU. A speed up through training on the GPU will most likely become visible when you are using convolutional modules in your neural networks. E.g., when passing an embedding net for image processing like in this example: https://github.com/sbi-dev/sbi/blob/main/tutorials/05_embedding_net.ipynb .","title":"Performance"},{"location":"faq/question_05/","text":"How should I save and load objects in sbi ? \u00b6 NeuralPosterior objects are picklable. import pickle # ... run inference posterior = inference . build_posterior () with open ( \"/path/to/my_posterior.pkl\" , \"wb\" ) as handle : pickle . dump ( posterior , handle ) Note: posterior objects that were saved under sbi v0.17.2 or older can not be loaded under sbi v0.18.0 or newer. Note: if you try to load a posterior that was saved under sbi v0.14.x or earlier under sbi v0.15.x until sbi v0.17.x , you have to add: import sys from sbi.utils import user_input_checks_utils sys . modules [ \"sbi.user_input.user_input_checks_utils\" ] = user_input_checks_utils to your script before loading the posterior. As of sbi v0.18.0 , NeuralInference objects are also picklable. import pickle # ... run inference posterior = inference . build_posterior () with open ( \"/path/to/my_inference.pkl\" , \"wb\" ) as handle : pickle . dump ( inference , handle ) However, saving and loading the inference object will slightly modify the object (in order to make it serializable). These modifications lead to the following two changes in behaviour: 1) Retraining from scratch is not supported, i.e. .train(..., retrain_from_scratch=True) does not work. 2) When the loaded object calls the .train() method, it generates a new tensorboard summary writer (instead of appending to the current one).","title":"How should I save and load objects in sbi?"},{"location":"faq/question_05/#how-should-i-save-and-load-objects-in-sbi","text":"NeuralPosterior objects are picklable. import pickle # ... run inference posterior = inference . build_posterior () with open ( \"/path/to/my_posterior.pkl\" , \"wb\" ) as handle : pickle . dump ( posterior , handle ) Note: posterior objects that were saved under sbi v0.17.2 or older can not be loaded under sbi v0.18.0 or newer. Note: if you try to load a posterior that was saved under sbi v0.14.x or earlier under sbi v0.15.x until sbi v0.17.x , you have to add: import sys from sbi.utils import user_input_checks_utils sys . modules [ \"sbi.user_input.user_input_checks_utils\" ] = user_input_checks_utils to your script before loading the posterior. As of sbi v0.18.0 , NeuralInference objects are also picklable. import pickle # ... run inference posterior = inference . build_posterior () with open ( \"/path/to/my_inference.pkl\" , \"wb\" ) as handle : pickle . dump ( inference , handle ) However, saving and loading the inference object will slightly modify the object (in order to make it serializable). These modifications lead to the following two changes in behaviour: 1) Retraining from scratch is not supported, i.e. .train(..., retrain_from_scratch=True) does not work. 2) When the loaded object calls the .train() method, it generates a new tensorboard summary writer (instead of appending to the current one).","title":"How should I save and load objects in sbi?"},{"location":"faq/question_06/","text":"Can I stop neural network training and resume it later? \u00b6 Many clusters have a time limit and sbi might exceed this limit. You can circumvent this problem by using the flexible interface . After simulations are finished, sbi trains a neural network. If this process takes too long, you can stop training and resume it later. The syntax is: inference = SNPE ( prior = prior ) inference = inference . append_simulations ( theta , x ) inference . train ( max_num_epochs = 300 ) # Pick `max_num_epochs` such that it does not exceed the runtime. with open ( \"path/to/my/inference.pkl\" , \"wb\" ) as handle : pickle . dump ( inference , handle ) # To resume training: with open ( \"path/to/my/inference.pkl\" , \"rb\" ) as handle : inference_from_disk = pickle . load ( handle ) inference_from_disk . train ( resume_training = True , max_num_epochs = 600 ) # Run epochs 301 until 600 (or stop early). posterior = inference_from_disk . build_posterior ()","title":"Can I stop neural network training and resume it later?"},{"location":"faq/question_06/#can-i-stop-neural-network-training-and-resume-it-later","text":"Many clusters have a time limit and sbi might exceed this limit. You can circumvent this problem by using the flexible interface . After simulations are finished, sbi trains a neural network. If this process takes too long, you can stop training and resume it later. The syntax is: inference = SNPE ( prior = prior ) inference = inference . append_simulations ( theta , x ) inference . train ( max_num_epochs = 300 ) # Pick `max_num_epochs` such that it does not exceed the runtime. with open ( \"path/to/my/inference.pkl\" , \"wb\" ) as handle : pickle . dump ( inference , handle ) # To resume training: with open ( \"path/to/my/inference.pkl\" , \"rb\" ) as handle : inference_from_disk = pickle . load ( handle ) inference_from_disk . train ( resume_training = True , max_num_epochs = 600 ) # Run epochs 301 until 600 (or stop early). posterior = inference_from_disk . build_posterior ()","title":"Can I stop neural network training and resume it later?"},{"location":"faq/question_07/","text":"Can I use a custom prior with sbi? \u00b6 sbi works with torch distributions only so we recommend to use those whenever possible. For example, when you are used to using scipy.stats distributions as priors then we recommend using the corresponding torch.distributions , most common distributions are implemented there. In case you want to use a custom prior that is not in the set of common distributions that\u2019s possible as well: You need to write a prior class that mimicks the behaviour of a torch.distributions.Distribution class. Then sbi will wrap this class to make it a fully functional torch Distribution . Essentially, the class needs two methods: .sample(sample_shape) , where sample_shape is a shape tuple, e.g., (n,) , and returns a batch of n samples, e.g., of shape (n, 2)` for a two dimenional prior. .log_prob(value) method that returns the \u201clog probs\u201d of parameters under the prior, e.g., for a batches of n parameters with shape (n, ndims) it should return a log probs array of shape (n,) . For sbi > 0.17.2 this could look like the following: class CustomUniformPrior : \"\"\"User defined numpy uniform prior. Custom prior with user-defined valid .sample and .log_prob methods. \"\"\" def __init__ ( self , lower : Tensor , upper : Tensor , return_numpy : bool = False ): self . lower = lower self . upper = upper self . dist = BoxUniform ( lower , upper ) self . return_numpy = return_numpy def sample ( self , sample_shape = torch . Size ([])): samples = self . dist . sample ( sample_shape ) return samples . numpy () if self . return_numpy else samples def log_prob ( self , values ): if self . return_numpy : values = torch . as_tensor ( values ) log_probs = self . dist . log_prob ( values ) return log_probs . numpy () if self . return_numpy else log_probs Once you have such a class you can wrap into a Distribution using the process_prior function sbi provides: from sbi.utils import process_prior custom_prior = CustomUniformPrior ( torch . zeros ( 2 ), torch . ones ( 2 )) prior , * _ = process_prior ( custom_prior ) # Keeping only the first return. # use this wrapped prior in sbi... In sbi it is sometimes required to check the support of the prior, e.g., when the prior support is bounded and one wants to reject samples from the posterior density estimator that lie outside the prior support. In torch Distributions this is handled automatically, however, when using a custom prior it is not. Thus, if your prior has bounded support (like the one above) it makes sense to pass the bounds to the wrapper function such that sbi can pass them to torch Distributions : from sbi.utils import process_prior custom_prior = CustomUniformPrior ( torch . zeros ( 2 ), torch . ones ( 2 )) prior = process_prior ( custom_prior , custom_prior_wrapper_kwargs = dict ( lower_bound = torch . zeros ( 2 ), upper_bound = torch . ones ( 2 ))) # use this wrapped prior in sbi... Note that in custom_prior_wrapper_kwargs you can pass additinal arguments for the wrapper, e.g., validate_args or arg_constraints see the Distribution documentation for more details. If you are running sbi < 0.17.2 and use SNLE the code above will produce a NotImplementedError (see #581 ). In this case you need to update to a newer version of sbi or use SNPE instead.","title":"Can I use a custom prior with sbi?"},{"location":"faq/question_07/#can-i-use-a-custom-prior-with-sbi","text":"sbi works with torch distributions only so we recommend to use those whenever possible. For example, when you are used to using scipy.stats distributions as priors then we recommend using the corresponding torch.distributions , most common distributions are implemented there. In case you want to use a custom prior that is not in the set of common distributions that\u2019s possible as well: You need to write a prior class that mimicks the behaviour of a torch.distributions.Distribution class. Then sbi will wrap this class to make it a fully functional torch Distribution . Essentially, the class needs two methods: .sample(sample_shape) , where sample_shape is a shape tuple, e.g., (n,) , and returns a batch of n samples, e.g., of shape (n, 2)` for a two dimenional prior. .log_prob(value) method that returns the \u201clog probs\u201d of parameters under the prior, e.g., for a batches of n parameters with shape (n, ndims) it should return a log probs array of shape (n,) . For sbi > 0.17.2 this could look like the following: class CustomUniformPrior : \"\"\"User defined numpy uniform prior. Custom prior with user-defined valid .sample and .log_prob methods. \"\"\" def __init__ ( self , lower : Tensor , upper : Tensor , return_numpy : bool = False ): self . lower = lower self . upper = upper self . dist = BoxUniform ( lower , upper ) self . return_numpy = return_numpy def sample ( self , sample_shape = torch . Size ([])): samples = self . dist . sample ( sample_shape ) return samples . numpy () if self . return_numpy else samples def log_prob ( self , values ): if self . return_numpy : values = torch . as_tensor ( values ) log_probs = self . dist . log_prob ( values ) return log_probs . numpy () if self . return_numpy else log_probs Once you have such a class you can wrap into a Distribution using the process_prior function sbi provides: from sbi.utils import process_prior custom_prior = CustomUniformPrior ( torch . zeros ( 2 ), torch . ones ( 2 )) prior , * _ = process_prior ( custom_prior ) # Keeping only the first return. # use this wrapped prior in sbi... In sbi it is sometimes required to check the support of the prior, e.g., when the prior support is bounded and one wants to reject samples from the posterior density estimator that lie outside the prior support. In torch Distributions this is handled automatically, however, when using a custom prior it is not. Thus, if your prior has bounded support (like the one above) it makes sense to pass the bounds to the wrapper function such that sbi can pass them to torch Distributions : from sbi.utils import process_prior custom_prior = CustomUniformPrior ( torch . zeros ( 2 ), torch . ones ( 2 )) prior = process_prior ( custom_prior , custom_prior_wrapper_kwargs = dict ( lower_bound = torch . zeros ( 2 ), upper_bound = torch . ones ( 2 ))) # use this wrapped prior in sbi... Note that in custom_prior_wrapper_kwargs you can pass additinal arguments for the wrapper, e.g., validate_args or arg_constraints see the Distribution documentation for more details. If you are running sbi < 0.17.2 and use SNLE the code above will produce a NotImplementedError (see #581 ). In this case you need to update to a newer version of sbi or use SNPE instead.","title":"Can I use a custom prior with sbi?"},{"location":"tutorial/00_getting_started/","text":"Getting started with sbi \u00b6 Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/00_getting_started.ipynb in the sbi repository. import torch from sbi import utils as utils from sbi import analysis as analysis from sbi.inference.base import infer Running the inference procedure \u00b6 sbi provides a simple interface to run state-of-the-art algorithms for simulation-based inference. For inference, you need to provide two ingredients: 1) a prior distribution that allows to sample parameter sets. 2) a simulator that takes parameter sets and produces simulation outputs. For example, we can have a 3-dimensional parameter space with a uniform prior between [-1,1] and a simple simulator that for the sake of example adds 1.0 and some Gaussian noise to the parameter set: num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def simulator ( parameter_set ): return 1.0 + parameter_set + torch . randn ( parameter_set . shape ) * 0.1 sbi can then run inference: posterior = infer ( simulator , prior , method = \"SNPE\" , num_simulations = 1000 ) Running 1000 simulations.: 0%| | 0/1000 [00:001 , the posterior is no longer amortized: it will give good results when sampled around x=observation , but possibly bad results for other x . Once we have obtained the posterior, we can .sample() , .log_prob() , or .pairplot() in the same way as for the simple interface. posterior_samples = posterior . sample (( 10000 ,), x = x_o ) # plot posterior samples _ = analysis . pairplot ( posterior_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:001 , the posterior is no longer amortized: it will give good results when sampled around x=observation , but possibly bad results for other x . Once we have obtained the posterior, we can .sample() , .log_prob() , or .pairplot() in the same way as for the simple interface. posterior_samples = posterior . sample (( 10000 ,), x = x_o ) # plot posterior samples _ = analysis . pairplot ( posterior_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00 The simulator model \u00b6 The simulator model that we consider has two parameters: \\(r\\) and \\(\\theta\\) . On each run, it generates 100 two-dimensional points centered around \\((r \\cos(\\theta), r \\sin(\\theta))\\) and perturbed by a Gaussian noise with variance 0.01. Instead of simply outputting the \\((x,y)\\) coordinates of each data point, the model generates a grayscale image of the scattered points with dimensions 32 by 32. This image is further perturbed by an uniform noise with values betweeen 0 and 0.2. The code below defines such model. def simulator_model ( parameter , return_points = False ): \"\"\"Simulator model with two-dimensional input parameter and 1024-dimensional output This simulator serves as a basic example for using a neural net for learning summary features. It has only two input parameters but generates high-dimensional output vectors. The data is generated as follows: (-) Input: parameter = [r, theta] (1) Generate 100 two-dimensional points centered around (r cos(theta),r sin(theta)) and perturbed by a Gaussian noise with variance 0.01 (2) Create a grayscale image I of the scattered points with dimensions 32 by 32 (3) Perturb I with an uniform noise with values betweeen 0 and 0.2 (-) Output: I Parameters ---------- parameter : array-like, shape (2) The two input parameters of the model, ordered as [r, theta] return_points : bool (default: False) Whether the simulator should return the coordinates of the simulated data points as well Returns ------- I: torch tensor, shape (1, 1024) Output flattened image (optional) points: array-like, shape (100, 2) Coordinates of the 2D simulated data points \"\"\" r = parameter [ 0 ] theta = parameter [ 1 ] sigma_points = 0.10 npoints = 100 points = [] for _ in range ( npoints ): x = r * torch . cos ( theta ) + sigma_points * torch . randn ( 1 ) y = r * torch . sin ( theta ) + sigma_points * torch . randn ( 1 ) points . append ([ x , y ]) points = torch . as_tensor ( points ) nx = 32 ny = 32 sigma_image = 0.20 I = torch . zeros ( nx , ny ) for point in points : pi = int (( point [ 0 ] - ( - 1 )) / (( + 1 ) - ( - 1 )) * nx ) pj = int (( point [ 1 ] - ( - 1 )) / (( + 1 ) - ( - 1 )) * ny ) if ( pi < nx ) and ( pj < ny ): I [ pi , pj ] = 1 I = I + sigma_image * torch . rand ( nx , ny ) I = I . T I = I . reshape ( 1 , - 1 ) if return_points : return I , points else : return I The figure below shows an example of the output of the simulator when \\(r = 0.70\\) and \\(\\theta = \\pi/4\\) # simulate samples true_parameter = torch . tensor ([ 0.70 , torch . pi / 4 ]) x_observed , x_points = simulator_model ( true_parameter , return_points = True ) # plot the observation fig , ax = plt . subplots ( facecolor = \"white\" , figsize = ( 11.15 , 5.61 ), ncols = 2 , constrained_layout = True ) circle = plt . Circle (( 0 , 0 ), 1.0 , color = \"k\" , ls = \"--\" , lw = 0.8 , fill = False ) ax [ 0 ] . add_artist ( circle ) ax [ 0 ] . scatter ( x_points [:, 0 ], x_points [:, 1 ], s = 20 ) ax [ 0 ] . set_xlabel ( \"x\" ) ax [ 0 ] . set_ylabel ( \"y\" ) ax [ 0 ] . set_xlim ( - 1 , + 1 ) ax [ 0 ] . set_xticks ([ - 1 , 0.0 , + 1.0 ]) ax [ 0 ] . set_ylim ( - 1 , + 1 ) ax [ 0 ] . set_yticks ([ - 1 , 0.0 , + 1.0 ]) ax [ 0 ] . set_title ( r \"original simulated points with $r = 0.70$ and $\\theta = \\pi/4$\" ) ax [ 1 ] . imshow ( x_observed . view ( 32 , 32 ), origin = \"lower\" , cmap = \"gray\" ) ax [ 1 ] . set_xticks ([]) ax [ 1 ] . set_yticks ([]) ax [ 1 ] . set_title ( \"noisy observed data (gray image with 32 x 32 pixels)\" ) Text(0.5, 1.0, 'noisy observed data (gray image with 32 x 32 pixels)') Defining an embedding_net \u00b6 An inference procedure applied to the output data from this simulator model determines the posterior distribution of \\(r\\) and \\(\\theta\\) given an observation of \\(x\\) , which lives in a 1024 dimensional space (32 x 32 = 1024). To avoid working directly on these high-dimensional vectors, one can use a convolutional neural network (CNN) that takes the 32x32 images as input and encodes them into 8-dimensional feature vectors. This CNN is trained along with the neural density estimator of the inference procedure and serves as an automatic summary statistics extractor. We define and instantiate the CNN as follows: class SummaryNet ( nn . Module ): def __init__ ( self ): super () . __init__ () # 2D convolutional layer self . conv1 = nn . Conv2d ( in_channels = 1 , out_channels = 6 , kernel_size = 5 , padding = 2 ) # Maxpool layer that reduces 32x32 image to 4x4 self . pool = nn . MaxPool2d ( kernel_size = 8 , stride = 8 ) # Fully connected layer taking as input the 6 flattened output arrays from the maxpooling layer self . fc = nn . Linear ( in_features = 6 * 4 * 4 , out_features = 8 ) def forward ( self , x ): x = x . view ( - 1 , 1 , 32 , 32 ) x = self . pool ( F . relu ( self . conv1 ( x ))) x = x . view ( - 1 , 6 * 4 * 4 ) x = F . relu ( self . fc ( x )) return x embedding_net = SummaryNet () The inference procedure \u00b6 With the embedding_net defined and instantiated, we can follow the usual workflow of an inference procedure in sbi . The embedding_net object appears as an input argument when instantiating the neural density estimator with utils.posterior_nn . # set prior distribution for the parameters prior = utils . BoxUniform ( low = torch . tensor ([ 0.0 , 0.0 ]), high = torch . tensor ([ 1.0 , 2 * torch . pi ]) ) # make a SBI-wrapper on the simulator object for compatibility simulator_wrapper , prior = prepare_for_sbi ( simulator_model , prior ) # instantiate the neural density estimator neural_posterior = utils . posterior_nn ( model = \"maf\" , embedding_net = embedding_net , hidden_features = 10 , num_transforms = 2 ) # setup the inference procedure with the SNPE-C procedure inference = SNPE ( prior = prior , density_estimator = neural_posterior ) # run the inference procedure on one round and 10000 simulated data points theta , x = simulate_for_sbi ( simulator_wrapper , prior , num_simulations = 10000 ) Running 10000 simulations.: 0%| | 0/10000 [00:00","title":"Learning summary statistics with a neural net"},{"location":"tutorial/05_embedding_net/#the-simulator-model","text":"The simulator model that we consider has two parameters: \\(r\\) and \\(\\theta\\) . On each run, it generates 100 two-dimensional points centered around \\((r \\cos(\\theta), r \\sin(\\theta))\\) and perturbed by a Gaussian noise with variance 0.01. Instead of simply outputting the \\((x,y)\\) coordinates of each data point, the model generates a grayscale image of the scattered points with dimensions 32 by 32. This image is further perturbed by an uniform noise with values betweeen 0 and 0.2. The code below defines such model. def simulator_model ( parameter , return_points = False ): \"\"\"Simulator model with two-dimensional input parameter and 1024-dimensional output This simulator serves as a basic example for using a neural net for learning summary features. It has only two input parameters but generates high-dimensional output vectors. The data is generated as follows: (-) Input: parameter = [r, theta] (1) Generate 100 two-dimensional points centered around (r cos(theta),r sin(theta)) and perturbed by a Gaussian noise with variance 0.01 (2) Create a grayscale image I of the scattered points with dimensions 32 by 32 (3) Perturb I with an uniform noise with values betweeen 0 and 0.2 (-) Output: I Parameters ---------- parameter : array-like, shape (2) The two input parameters of the model, ordered as [r, theta] return_points : bool (default: False) Whether the simulator should return the coordinates of the simulated data points as well Returns ------- I: torch tensor, shape (1, 1024) Output flattened image (optional) points: array-like, shape (100, 2) Coordinates of the 2D simulated data points \"\"\" r = parameter [ 0 ] theta = parameter [ 1 ] sigma_points = 0.10 npoints = 100 points = [] for _ in range ( npoints ): x = r * torch . cos ( theta ) + sigma_points * torch . randn ( 1 ) y = r * torch . sin ( theta ) + sigma_points * torch . randn ( 1 ) points . append ([ x , y ]) points = torch . as_tensor ( points ) nx = 32 ny = 32 sigma_image = 0.20 I = torch . zeros ( nx , ny ) for point in points : pi = int (( point [ 0 ] - ( - 1 )) / (( + 1 ) - ( - 1 )) * nx ) pj = int (( point [ 1 ] - ( - 1 )) / (( + 1 ) - ( - 1 )) * ny ) if ( pi < nx ) and ( pj < ny ): I [ pi , pj ] = 1 I = I + sigma_image * torch . rand ( nx , ny ) I = I . T I = I . reshape ( 1 , - 1 ) if return_points : return I , points else : return I The figure below shows an example of the output of the simulator when \\(r = 0.70\\) and \\(\\theta = \\pi/4\\) # simulate samples true_parameter = torch . tensor ([ 0.70 , torch . pi / 4 ]) x_observed , x_points = simulator_model ( true_parameter , return_points = True ) # plot the observation fig , ax = plt . subplots ( facecolor = \"white\" , figsize = ( 11.15 , 5.61 ), ncols = 2 , constrained_layout = True ) circle = plt . Circle (( 0 , 0 ), 1.0 , color = \"k\" , ls = \"--\" , lw = 0.8 , fill = False ) ax [ 0 ] . add_artist ( circle ) ax [ 0 ] . scatter ( x_points [:, 0 ], x_points [:, 1 ], s = 20 ) ax [ 0 ] . set_xlabel ( \"x\" ) ax [ 0 ] . set_ylabel ( \"y\" ) ax [ 0 ] . set_xlim ( - 1 , + 1 ) ax [ 0 ] . set_xticks ([ - 1 , 0.0 , + 1.0 ]) ax [ 0 ] . set_ylim ( - 1 , + 1 ) ax [ 0 ] . set_yticks ([ - 1 , 0.0 , + 1.0 ]) ax [ 0 ] . set_title ( r \"original simulated points with $r = 0.70$ and $\\theta = \\pi/4$\" ) ax [ 1 ] . imshow ( x_observed . view ( 32 , 32 ), origin = \"lower\" , cmap = \"gray\" ) ax [ 1 ] . set_xticks ([]) ax [ 1 ] . set_yticks ([]) ax [ 1 ] . set_title ( \"noisy observed data (gray image with 32 x 32 pixels)\" ) Text(0.5, 1.0, 'noisy observed data (gray image with 32 x 32 pixels)')","title":"The simulator model"},{"location":"tutorial/05_embedding_net/#defining-an-embedding_net","text":"An inference procedure applied to the output data from this simulator model determines the posterior distribution of \\(r\\) and \\(\\theta\\) given an observation of \\(x\\) , which lives in a 1024 dimensional space (32 x 32 = 1024). To avoid working directly on these high-dimensional vectors, one can use a convolutional neural network (CNN) that takes the 32x32 images as input and encodes them into 8-dimensional feature vectors. This CNN is trained along with the neural density estimator of the inference procedure and serves as an automatic summary statistics extractor. We define and instantiate the CNN as follows: class SummaryNet ( nn . Module ): def __init__ ( self ): super () . __init__ () # 2D convolutional layer self . conv1 = nn . Conv2d ( in_channels = 1 , out_channels = 6 , kernel_size = 5 , padding = 2 ) # Maxpool layer that reduces 32x32 image to 4x4 self . pool = nn . MaxPool2d ( kernel_size = 8 , stride = 8 ) # Fully connected layer taking as input the 6 flattened output arrays from the maxpooling layer self . fc = nn . Linear ( in_features = 6 * 4 * 4 , out_features = 8 ) def forward ( self , x ): x = x . view ( - 1 , 1 , 32 , 32 ) x = self . pool ( F . relu ( self . conv1 ( x ))) x = x . view ( - 1 , 6 * 4 * 4 ) x = F . relu ( self . fc ( x )) return x embedding_net = SummaryNet ()","title":"Defining an embedding_net"},{"location":"tutorial/05_embedding_net/#the-inference-procedure","text":"With the embedding_net defined and instantiated, we can follow the usual workflow of an inference procedure in sbi . The embedding_net object appears as an input argument when instantiating the neural density estimator with utils.posterior_nn . # set prior distribution for the parameters prior = utils . BoxUniform ( low = torch . tensor ([ 0.0 , 0.0 ]), high = torch . tensor ([ 1.0 , 2 * torch . pi ]) ) # make a SBI-wrapper on the simulator object for compatibility simulator_wrapper , prior = prepare_for_sbi ( simulator_model , prior ) # instantiate the neural density estimator neural_posterior = utils . posterior_nn ( model = \"maf\" , embedding_net = embedding_net , hidden_features = 10 , num_transforms = 2 ) # setup the inference procedure with the SNPE-C procedure inference = SNPE ( prior = prior , density_estimator = neural_posterior ) # run the inference procedure on one round and 10000 simulated data points theta , x = simulate_for_sbi ( simulator_wrapper , prior , num_simulations = 10000 ) Running 10000 simulations.: 0%| | 0/10000 [00:00] 1.3 Summary statistics \u00b6 We will compare two methods for defining summary statistics. One method uses three summary statistics which are function evaluations at three points in time. The other method uses a single summary statistic: the mean squared error between the observed and the simulated trace. In the second case, one then tries to obtain the posterior \\(p(\\theta | 0)\\) , i.e. the error being zero. These two methods are implemented below: \\(\\textbf{get_3_values()}\\) returns 3 function evaluations at \\(x=-0.5, x=0\\) and \\(x=0.75\\) . \\(\\textbf{get_MSE()}\\) returns the mean squared error between true and a quadratic function corresponding to a prior distributions sample. def get_3_values ( theta , seed = None ): \"\"\" Return 3 'y' values corresponding to x=-0.5,0,0.75 as summary statistic vector \"\"\" return np . array ( [ eval ( theta , - 0.5 , seed = seed ), eval ( theta , 0 , seed = seed ), eval ( theta , 0.75 , seed = seed ), ] ) . T def get_MSE ( theta , theta_o , seed = None ): \"\"\" Return the mean-squared error (MSE) i.e. Euclidean distance from the observation function \"\"\" _ , y = create_t_x ( theta_o , seed = seed ) # truth _ , y_ = create_t_x ( theta , seed = seed ) # simulations return np . mean ( np . square ( y_ - y ), axis = 0 , keepdims = True ) . T # MSE Let\u2019s try a couple of samples from our prior and see their summary statistics. Notice that these indeed change in small amounts every time you rerun it due to the noise, except if you set the seed. 1.4 Simulating data \u00b6 Let us see various plots of prior samples and their summary statistics versus the truth, i.e. our artificial observation. t , x_truth = create_t_x ( theta_o ) plt . plot ( t , x_truth , \"k\" , zorder = 1 , label = \"truth\" ) n_samples = 100 theta = prior . sample (( n_samples ,)) t , x = create_t_x ( theta . numpy ()) plt . plot ( t , x , \"grey\" , zorder = 0 ) plt . legend () In summary, we defined reasonable summary statistics and, a priori, there might be an appararent reason why one method would be better than another. When we do inference, we\u2019d like our posterior to focus around parameter samples that have their simulated MSE very close to 0 (i.e. the truth MSE summary statistic) or their 3 extracted \\((t, x)\\) coordinates to be the truthful ones. 1.5 Inference \u00b6 1.5.1 Using the MSE \u00b6 Let\u2019s see if we can use the MSE to recover the true observation parameters \\(\\theta_o=(a_0,b_0,c_0)\\) . theta = prior . sample (( 1000 ,)) x = get_MSE ( theta . numpy (), theta_o ) theta = torch . as_tensor ( theta , dtype = torch . float32 ) x = torch . as_tensor ( x , dtype = torch . float32 ) inference = SNPE ( prior ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () Neural network successfully converged after 181 epochs. Now that we\u2019ve build the posterior as such, we can see how likely it finds certain parameters given that we tell it that we\u2019ve observed a certain summary statistic (in this case the MSE). We can then sample from it. x_o = torch . as_tensor ( [ [ 0.0 , ] ] ) theta_p = posterior . sample (( 10000 ,), x = x_o ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00 The functions are a bit closer to the observation than prior samples, but many posterior samples generate activity that is very far off from the observation. We would expect sbi do better on such a simple example. So what\u2019s going on? Do we need more simulations? Feel free to try, but below we will show that one can use the same number of simulation samples with different summary statistics and do much better. 1.5.2 Using 3 coordinates as summary statistics \u00b6 x = get_3_values ( theta . numpy ()) x = torch . as_tensor ( x , dtype = torch . float32 ) inference = SNPE ( prior ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () Neural network successfully converged after 127 epochs. The observation is now given by the values of the observed trace at three different coordinates: x_o = torch . as_tensor ( get_3_values ( theta_o ), dtype = float ) theta_p = posterior . sample (( 10000 ,), x = x_o ) fig , axes = pairplot ( theta_p , limits = list ( zip ( prior_min , prior_max )), ticks = list ( zip ( prior_min , prior_max )), figsize = ( 7 , 7 ), labels = [ \"a\" , \"b\" , \"c\" ], points_offdiag = { \"markersize\" : 6 }, points_colors = \"r\" , points = theta_o , ); Drawing 10000 posterior samples: 0%| | 0/10000 [00:00 Ok this definitely seems to work! The posterior correctly focuses on the true parameters with greater confidence. You can experiment yourself how this improves further with more training samples or you could try to see how many you\u2019d exactly need to keep having a satisfyingly looking posterior and high posterior sample simulations. So, what\u2019s up with the MSE? Why does it not seem so informative to constrain the posterior? In 1.6, we\u2019ll see both the power and pitfalls of summary statistics. 1.6 Prior simulations\u2019 summary statistics vs observed summary statistics \u00b6 Let\u2019s try to understand this\u2026Let\u2019s look at a histogram of the four summary statistics we\u2019ve experimented with, and see how they compare to our observed truth summary statistic vector: stats = np . concatenate ( ( get_3_values ( theta . numpy ()), get_MSE ( theta . numpy (), theta_o )), axis = 1 ) x_o = np . concatenate (( get_3_values ( theta_o ), np . asarray ([[ 0.0 ]])), axis = 1 ) features = [ \"y @ x=-0.5\" , \"y @ x=0\" , \"y @ x=0.7\" , \"MSE\" ] fig , axes = plt . subplots ( 1 , 4 , figsize = ( 10 , 3 )) xlabelfontsize = 10 for i , ax in enumerate ( axes . reshape ( - 1 )): ax . hist ( stats [:, i ], color = [ \"grey\" ], alpha = 0.5 , bins = 30 , density = True , histtype = \"stepfilled\" , label = [ \"simulations\" ], ) ax . axvline ( x_o [:, i ], label = \"observation\" ) ax . set_xlabel ( features [ i ], fontsize = xlabelfontsize ) if i == 3 : ax . legend () plt . tight_layout () We see that for the coordinates (three plots on the left), simulations cover the observation. That is: it covers it from the left and right side in each case. For the MSE, simulations never truly reach the observation \\(0.0\\) . For the trained neural network, it is strongly preferable if the simulations cover the observation. In that case, the neural network can interpolate between simulated data. Contrary to that, for the MSE, the neural network has to extrapolate : it never observes a simulation that is to the left of the observation and has to extrapolate to the region of MSE= \\(0.0\\) . This seems like a technical point but, as we saw above, it makes a huge difference in performance. 1.7 Explicit recommendations \u00b6 We give some explicit recommendation when using summary statistics Visualize the histogram of each summary statistic and plot the value of the observation. If, for some summary statistics, the observation is not covered (or is at the very border, e.g. the MSE above), the trained neural network will struggle. Do not use an \u201cerror\u201d as summary statistic. This is common in optimization (e.g. genetic algorithms), but it often leads to trouble in sbi due to the reason above. Only use summary statistics that are necessary. The less summary statistics you use, the less can go wrong with them. Of course, you have to ensure that the summary statistics describe the raw data sufficiently well.","title":"Crafting summary statistics"},{"location":"tutorial/10_crafting_summary_statistics/#crafting-summary-statistics","text":"Many simulators produce outputs that are high-dimesional. For example, a simulator might generate a time series or an image. In a previous tutorial , we discussed how a neural networks can be used to learn summary statistics from such data. In this notebook, we will instead focus on hand-crafting summary statistics. We demonstrate that the choice of summary statistics can be crucial for the performance of the inference algorithm. import numpy as np import torch import matplotlib.pyplot as plt import matplotlib as mpl # sbi import sbi.utils as utils from sbi.inference.base import infer from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi from sbi.utils.get_nn_models import posterior_nn from sbi.analysis import pairplot # remove top and right axis from plots mpl . rcParams [ \"axes.spines.right\" ] = False mpl . rcParams [ \"axes.spines.top\" ] = False This notebook is not intended to provide a one-fits-all approach. In fact it argues against this: it argues for the user to carefully construct their summary statistics to (i) further help the user understand his observed data, (ii) help them understand exactly what they want the model to recover from the observation and (iii) help the inference framework itself.","title":"Crafting summary statistics"},{"location":"tutorial/10_crafting_summary_statistics/#example-1-the-quadratic-function","text":"Assume we have a simulator that is given by a quadratic function: \\(x(t) = a\\cdot t^2 + b\\cdot t + c + \\epsilon\\) , where \\(\\epsilon\\) is Gaussian observation noise and \\(\\theta = \\{a, b, c\\}\\) are the parameters. Given an observed quadratic function \\(x_o\\) , we would like to recover the posterior over parameters \\(a_o\\) , \\(b_o\\) and \\(c_o\\) .","title":"Example 1: The quadratic function"},{"location":"tutorial/10_crafting_summary_statistics/#11-prior-over-parameters","text":"First we define a prior distribution over parameters \\(a\\) , \\(b\\) and \\(c\\) . Here, we use a uniform prior for \\(a\\) , \\(b\\) and \\(c\\) to go from \\(-1\\) to \\(1\\) . prior_min = [ - 1 , - 1 , - 1 ] prior_max = [ 1 , 1 , 1 ] prior = utils . torchutils . BoxUniform ( low = torch . as_tensor ( prior_min ), high = torch . as_tensor ( prior_max ) )","title":"1.1 Prior over parameters"},{"location":"tutorial/10_crafting_summary_statistics/#12-simulator","text":"Defining some helper functions first: def create_t_x ( theta , seed = None ): \"\"\"Return an t, x array for plotting based on params\"\"\" if theta . ndim == 1 : theta = theta [ np . newaxis , :] if seed is not None : rng = np . random . RandomState ( seed ) else : rng = np . random . RandomState () t = np . linspace ( - 1 , 1 , 200 ) ts = np . repeat ( t [:, np . newaxis ], theta . shape [ 0 ], axis = 1 ) x = ( theta [:, 0 ] * ts ** 2 + theta [:, 1 ] * ts + theta [:, 2 ] + 0.01 * rng . randn ( ts . shape [ 0 ], theta . shape [ 0 ]) ) return t , x def eval ( theta , t , seed = None ): \"\"\"Evaluate the quadratic function at `t`\"\"\" if theta . ndim == 1 : theta = theta [ np . newaxis , :] if seed is not None : rng = np . random . RandomState ( seed ) else : rng = np . random . RandomState () return theta [:, 0 ] * t ** 2 + theta [:, 1 ] * t + theta [:, 2 ] + 0.01 * rng . randn ( 1 ) In this example, we generate the observation \\(x_o\\) from parameters \\(\\theta_o=(a_o, b_o, c_o)=(0.3, -0.2, -0.1)\\) . The observation as follows. theta_o = np . array ([ 0.3 , - 0.2 , - 0.1 ]) t , x = create_t_x ( theta_o ) plt . plot ( t , x , \"k\" ) []","title":"1.2 Simulator"},{"location":"tutorial/10_crafting_summary_statistics/#13-summary-statistics","text":"We will compare two methods for defining summary statistics. One method uses three summary statistics which are function evaluations at three points in time. The other method uses a single summary statistic: the mean squared error between the observed and the simulated trace. In the second case, one then tries to obtain the posterior \\(p(\\theta | 0)\\) , i.e. the error being zero. These two methods are implemented below: \\(\\textbf{get_3_values()}\\) returns 3 function evaluations at \\(x=-0.5, x=0\\) and \\(x=0.75\\) . \\(\\textbf{get_MSE()}\\) returns the mean squared error between true and a quadratic function corresponding to a prior distributions sample. def get_3_values ( theta , seed = None ): \"\"\" Return 3 'y' values corresponding to x=-0.5,0,0.75 as summary statistic vector \"\"\" return np . array ( [ eval ( theta , - 0.5 , seed = seed ), eval ( theta , 0 , seed = seed ), eval ( theta , 0.75 , seed = seed ), ] ) . T def get_MSE ( theta , theta_o , seed = None ): \"\"\" Return the mean-squared error (MSE) i.e. Euclidean distance from the observation function \"\"\" _ , y = create_t_x ( theta_o , seed = seed ) # truth _ , y_ = create_t_x ( theta , seed = seed ) # simulations return np . mean ( np . square ( y_ - y ), axis = 0 , keepdims = True ) . T # MSE Let\u2019s try a couple of samples from our prior and see their summary statistics. Notice that these indeed change in small amounts every time you rerun it due to the noise, except if you set the seed.","title":"1.3 Summary statistics"},{"location":"tutorial/10_crafting_summary_statistics/#14-simulating-data","text":"Let us see various plots of prior samples and their summary statistics versus the truth, i.e. our artificial observation. t , x_truth = create_t_x ( theta_o ) plt . plot ( t , x_truth , \"k\" , zorder = 1 , label = \"truth\" ) n_samples = 100 theta = prior . sample (( n_samples ,)) t , x = create_t_x ( theta . numpy ()) plt . plot ( t , x , \"grey\" , zorder = 0 ) plt . legend () In summary, we defined reasonable summary statistics and, a priori, there might be an appararent reason why one method would be better than another. When we do inference, we\u2019d like our posterior to focus around parameter samples that have their simulated MSE very close to 0 (i.e. the truth MSE summary statistic) or their 3 extracted \\((t, x)\\) coordinates to be the truthful ones.","title":"1.4 Simulating data"},{"location":"tutorial/10_crafting_summary_statistics/#15-inference","text":"","title":"1.5 Inference"},{"location":"tutorial/10_crafting_summary_statistics/#151-using-the-mse","text":"Let\u2019s see if we can use the MSE to recover the true observation parameters \\(\\theta_o=(a_0,b_0,c_0)\\) . theta = prior . sample (( 1000 ,)) x = get_MSE ( theta . numpy (), theta_o ) theta = torch . as_tensor ( theta , dtype = torch . float32 ) x = torch . as_tensor ( x , dtype = torch . float32 ) inference = SNPE ( prior ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () Neural network successfully converged after 181 epochs. Now that we\u2019ve build the posterior as such, we can see how likely it finds certain parameters given that we tell it that we\u2019ve observed a certain summary statistic (in this case the MSE). We can then sample from it. x_o = torch . as_tensor ( [ [ 0.0 , ] ] ) theta_p = posterior . sample (( 10000 ,), x = x_o ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00 The functions are a bit closer to the observation than prior samples, but many posterior samples generate activity that is very far off from the observation. We would expect sbi do better on such a simple example. So what\u2019s going on? Do we need more simulations? Feel free to try, but below we will show that one can use the same number of simulation samples with different summary statistics and do much better.","title":"1.5.1 Using the MSE"},{"location":"tutorial/10_crafting_summary_statistics/#152-using-3-coordinates-as-summary-statistics","text":"x = get_3_values ( theta . numpy ()) x = torch . as_tensor ( x , dtype = torch . float32 ) inference = SNPE ( prior ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () Neural network successfully converged after 127 epochs. The observation is now given by the values of the observed trace at three different coordinates: x_o = torch . as_tensor ( get_3_values ( theta_o ), dtype = float ) theta_p = posterior . sample (( 10000 ,), x = x_o ) fig , axes = pairplot ( theta_p , limits = list ( zip ( prior_min , prior_max )), ticks = list ( zip ( prior_min , prior_max )), figsize = ( 7 , 7 ), labels = [ \"a\" , \"b\" , \"c\" ], points_offdiag = { \"markersize\" : 6 }, points_colors = \"r\" , points = theta_o , ); Drawing 10000 posterior samples: 0%| | 0/10000 [00:00 Ok this definitely seems to work! The posterior correctly focuses on the true parameters with greater confidence. You can experiment yourself how this improves further with more training samples or you could try to see how many you\u2019d exactly need to keep having a satisfyingly looking posterior and high posterior sample simulations. So, what\u2019s up with the MSE? Why does it not seem so informative to constrain the posterior? In 1.6, we\u2019ll see both the power and pitfalls of summary statistics.","title":"1.5.2 Using 3 coordinates as summary statistics"},{"location":"tutorial/10_crafting_summary_statistics/#16-prior-simulations-summary-statistics-vs-observed-summary-statistics","text":"Let\u2019s try to understand this\u2026Let\u2019s look at a histogram of the four summary statistics we\u2019ve experimented with, and see how they compare to our observed truth summary statistic vector: stats = np . concatenate ( ( get_3_values ( theta . numpy ()), get_MSE ( theta . numpy (), theta_o )), axis = 1 ) x_o = np . concatenate (( get_3_values ( theta_o ), np . asarray ([[ 0.0 ]])), axis = 1 ) features = [ \"y @ x=-0.5\" , \"y @ x=0\" , \"y @ x=0.7\" , \"MSE\" ] fig , axes = plt . subplots ( 1 , 4 , figsize = ( 10 , 3 )) xlabelfontsize = 10 for i , ax in enumerate ( axes . reshape ( - 1 )): ax . hist ( stats [:, i ], color = [ \"grey\" ], alpha = 0.5 , bins = 30 , density = True , histtype = \"stepfilled\" , label = [ \"simulations\" ], ) ax . axvline ( x_o [:, i ], label = \"observation\" ) ax . set_xlabel ( features [ i ], fontsize = xlabelfontsize ) if i == 3 : ax . legend () plt . tight_layout () We see that for the coordinates (three plots on the left), simulations cover the observation. That is: it covers it from the left and right side in each case. For the MSE, simulations never truly reach the observation \\(0.0\\) . For the trained neural network, it is strongly preferable if the simulations cover the observation. In that case, the neural network can interpolate between simulated data. Contrary to that, for the MSE, the neural network has to extrapolate : it never observes a simulation that is to the left of the observation and has to extrapolate to the region of MSE= \\(0.0\\) . This seems like a technical point but, as we saw above, it makes a huge difference in performance.","title":"1.6 Prior simulations' summary statistics vs observed summary statistics"},{"location":"tutorial/10_crafting_summary_statistics/#17-explicit-recommendations","text":"We give some explicit recommendation when using summary statistics Visualize the histogram of each summary statistic and plot the value of the observation. If, for some summary statistics, the observation is not covered (or is at the very border, e.g. the MSE above), the trained neural network will struggle. Do not use an \u201cerror\u201d as summary statistic. This is common in optimization (e.g. genetic algorithms), but it often leads to trouble in sbi due to the reason above. Only use summary statistics that are necessary. The less summary statistics you use, the less can go wrong with them. Of course, you have to ensure that the summary statistics describe the raw data sufficiently well.","title":"1.7 Explicit recommendations"},{"location":"tutorial/11_sampler_interface/","text":"The sampler interface \u00b6 Note: this tutorial requires that the user is already familiar with the flexible interface . sbi implements three methods: SNPE, SNLE, and SNRE. When using SNPE, the trained neural network directly approximates the posterior. Thus, sampling from the posterior can be done by sampling from the trained neural network. The neural networks trained in SNLE and SNRE approximate the likelihood(-ratio). Thus, in order to draw samples from the posterior, one has to perform additional sampling steps, e.g. Markov-chain Monte-Carlo (MCMC). In sbi , the implemented samplers are: Markov-chain Monte-Carlo (MCMC) Rejection sampling Variational inference (VI) When using the flexible interface, the sampler as well as its attributes can be set with sample_with=\"mcmc\" , mcmc_method=\"slice_np\" , and mcmc_parameters={} . However, for full flexibility in customizing the sampler, we recommend using the sampler interface . This interface is described here. Further details can be found here . Main syntax for SNLE \u00b6 import torch from sbi.inference import SNLE from sbi.inference import likelihood_estimator_based_potential , MCMCPosterior # dummy Gaussian simulator for demonstration num_dim = 2 prior = torch . distributions . MultivariateNormal ( torch . zeros ( num_dim ), torch . eye ( num_dim )) theta = prior . sample (( 1000 ,)) x = theta + torch . randn (( 1000 , num_dim )) x_o = torch . randn (( 1 , num_dim )) inference = SNLE ( show_progress_bars = False ) likelihood_estimator = inference . append_simulations ( theta , x ) . train () potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform ) Neural network successfully converged after 52 epochs. Further explanation \u00b6 The first lines are the same as for the flexible interface: inference = SNLE () likelihood_estimator = inference . append_simulations ( theta , x ) . train () Neural network successfully converged after 33 epochs. Next, we obtain the potential function. A potential function is a function of the parameter \\(f(\\theta)\\) . The posterior is proportional to the product of likelihood and prior: \\(p(\\theta | x_o) \\propto p(x_o | \\theta)p(\\theta)\\) . The potential function is the logarithm of the right-hand side of this equation: \\(f(\\theta) = \\log(p(x_o | \\theta)p(\\theta))\\) potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) By calling the potential_fn , you can evaluate the potential: # Assuming that your parameters are 1D. potential = potential_fn ( torch . zeros ( 1 , num_dim ) ) # -> returns f(0) = log( p(x_o|0) p(0) ) The other object that is returned by likelihood_estimator_based_potential is a parameter_transform . The parameter_transform is a pytorch transform . The parameter_transform is a fixed transform that is can be applied to parameter theta . It transforms the parameters into unconstrained space (if the prior is bounded, e.g. BoxUniform ), and standardizes the parameters (i.e. zero mean, one std). Using parameter_transform during sampling is optional, but it usually improves the performance of MCMC. theta_tf = parameter_transform ( torch . zeros ( 1 , num_dim )) theta_original = parameter_transform . inv ( theta_tf ) print ( theta_original ) # -> tensor([[0.0]]) tensor([[0., 0.]]) After having obtained the potential_fn , we can sample from the posterior with MCMC or rejection sampling: from sbi.inference import MCMCPosterior , RejectionPosterior posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform ) posterior = RejectionPosterior ( potential_fn , proposal = prior ) Main syntax for SNPE \u00b6 SNPE usually does not require MCMC or rejection sampling (if you still need it, you can use the same syntax as above with the posterior_estimator_based_potential function). Instead, SNPE samples from the neural network. If the support of the prior is bounded, some samples can lie outside of the support of the prior. The DirectPosterior class automatically rejects these samples: from sbi.inference import SNPE from sbi.inference import DirectPosterior inference = SNPE () posterior_estimator = inference . append_simulations ( theta , x ) . train () posterior = DirectPosterior ( posterior_estimator , prior = prior ) Neural network successfully converged after 57 epochs.","title":"Sampler interface"},{"location":"tutorial/11_sampler_interface/#the-sampler-interface","text":"Note: this tutorial requires that the user is already familiar with the flexible interface . sbi implements three methods: SNPE, SNLE, and SNRE. When using SNPE, the trained neural network directly approximates the posterior. Thus, sampling from the posterior can be done by sampling from the trained neural network. The neural networks trained in SNLE and SNRE approximate the likelihood(-ratio). Thus, in order to draw samples from the posterior, one has to perform additional sampling steps, e.g. Markov-chain Monte-Carlo (MCMC). In sbi , the implemented samplers are: Markov-chain Monte-Carlo (MCMC) Rejection sampling Variational inference (VI) When using the flexible interface, the sampler as well as its attributes can be set with sample_with=\"mcmc\" , mcmc_method=\"slice_np\" , and mcmc_parameters={} . However, for full flexibility in customizing the sampler, we recommend using the sampler interface . This interface is described here. Further details can be found here .","title":"The sampler interface"},{"location":"tutorial/11_sampler_interface/#main-syntax-for-snle","text":"import torch from sbi.inference import SNLE from sbi.inference import likelihood_estimator_based_potential , MCMCPosterior # dummy Gaussian simulator for demonstration num_dim = 2 prior = torch . distributions . MultivariateNormal ( torch . zeros ( num_dim ), torch . eye ( num_dim )) theta = prior . sample (( 1000 ,)) x = theta + torch . randn (( 1000 , num_dim )) x_o = torch . randn (( 1 , num_dim )) inference = SNLE ( show_progress_bars = False ) likelihood_estimator = inference . append_simulations ( theta , x ) . train () potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform ) Neural network successfully converged after 52 epochs.","title":"Main syntax for SNLE"},{"location":"tutorial/11_sampler_interface/#further-explanation","text":"The first lines are the same as for the flexible interface: inference = SNLE () likelihood_estimator = inference . append_simulations ( theta , x ) . train () Neural network successfully converged after 33 epochs. Next, we obtain the potential function. A potential function is a function of the parameter \\(f(\\theta)\\) . The posterior is proportional to the product of likelihood and prior: \\(p(\\theta | x_o) \\propto p(x_o | \\theta)p(\\theta)\\) . The potential function is the logarithm of the right-hand side of this equation: \\(f(\\theta) = \\log(p(x_o | \\theta)p(\\theta))\\) potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) By calling the potential_fn , you can evaluate the potential: # Assuming that your parameters are 1D. potential = potential_fn ( torch . zeros ( 1 , num_dim ) ) # -> returns f(0) = log( p(x_o|0) p(0) ) The other object that is returned by likelihood_estimator_based_potential is a parameter_transform . The parameter_transform is a pytorch transform . The parameter_transform is a fixed transform that is can be applied to parameter theta . It transforms the parameters into unconstrained space (if the prior is bounded, e.g. BoxUniform ), and standardizes the parameters (i.e. zero mean, one std). Using parameter_transform during sampling is optional, but it usually improves the performance of MCMC. theta_tf = parameter_transform ( torch . zeros ( 1 , num_dim )) theta_original = parameter_transform . inv ( theta_tf ) print ( theta_original ) # -> tensor([[0.0]]) tensor([[0., 0.]]) After having obtained the potential_fn , we can sample from the posterior with MCMC or rejection sampling: from sbi.inference import MCMCPosterior , RejectionPosterior posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform ) posterior = RejectionPosterior ( potential_fn , proposal = prior )","title":"Further explanation"},{"location":"tutorial/11_sampler_interface/#main-syntax-for-snpe","text":"SNPE usually does not require MCMC or rejection sampling (if you still need it, you can use the same syntax as above with the posterior_estimator_based_potential function). Instead, SNPE samples from the neural network. If the support of the prior is bounded, some samples can lie outside of the support of the prior. The DirectPosterior class automatically rejects these samples: from sbi.inference import SNPE from sbi.inference import DirectPosterior inference = SNPE () posterior_estimator = inference . append_simulations ( theta , x ) . train () posterior = DirectPosterior ( posterior_estimator , prior = prior ) Neural network successfully converged after 57 epochs.","title":"Main syntax for SNPE"},{"location":"tutorial/12_diagnostics_posterior_predictive_check/","text":"Posterior Predictive Checks (PPC) in SBI \u00b6 A common safety check performed as part of inference are Posterior Predictive Checks (PPC) . A PPC compares data \\(x_{\\text{pp}}\\) generated using the parameters \\(\\theta_{\\text{posterior}}\\) sampled from the posterior with the observed data \\(x_o\\) . The general concept is that -if the inference is correct- the generated data \\(x_{\\text{pp}}\\) should \u201clook similar\u201d the oberved data \\(x_0\\) . Said differently, \\(x_o\\) should be within the support of \\(x_{\\text{pp}}\\) . A PPC usually shouldn\u2019t be used as a validation metric . Nonetheless a PPC is a good start for an inference diagnosis and can provide with an intuition about any bias introduced in inference: does \\(x_{\\text{pp}}\\) systematically differ from \\(x_o\\) ? Main syntax \u00b6 from sbi.analysis import pairplot # A PPC is performed after we trained or neural posterior posterior . set_default_x ( x_o ) # We draw theta samples from the posterior. This part is not in the scope of SBI posterior_samples = posterior . sample (( 5_000 ,)) # We use posterior theta samples to generate x data x_pp = simulator ( posterior_samples ) # We verify if the observed data falls within the support of the generated data _ = pairplot ( samples = x_pp , points = x_o ) Performing a PPC over a toy example \u00b6 Below we provide an example Posterior Predictive Check (PPC) over some toy example: from sbi.analysis import pairplot import torch _ = torch . manual_seed ( 0 ) We work on an inference problem over three parameters using any of the techniques implemented in sbi . In this tutorial, we load the dummy posterior: from toy_posterior_for_07_cc import ExamplePosterior posterior = ExamplePosterior () Let us say that we are observing the data point \\(x_o\\) : D = 5 # simulator output was 5-dimensional x_o = torch . ones ( 1 , D ) posterior . set_default_x ( x_o ) The posterior can be used to draw \\(\\theta_{\\text{posterior}}\\) samples: posterior_samples = posterior . sample (( 5_000 ,)) fig , ax = pairplot ( samples = posterior_samples , limits = torch . tensor ([[ - 2.5 , 2.5 ]] * 3 ), offdiag = [ \"kde\" ], diag = [ \"kde\" ], figsize = ( 5 , 5 ), labels = [ rf \"$\\theta_ { d } $\" for d in range ( 3 )], ) Now we can use our simulator to generate some data \\(x_{\\text{PP}}\\) , using as input parameters the poterior samples \\(\\theta_{\\text{posterior}}\\) . Note that the simulation part is not in the sbi scope, so any simulator -including a non-Python one- can be used at this stage. In our case we\u2019ll use a dummy simulator: def dummy_simulator ( posterior_samples : torch . Tensor , * args , ** kwargs ) -> torch . Tensor : sample_size = posterior_samples . shape [ 0 ] scale = 1.0 shift = torch . distributions . Gumbel ( loc = torch . zeros ( D ), scale = scale / 2 ) . sample () return torch . distributions . Gumbel ( loc = x_o [ 0 ] + shift , scale = scale ) . sample ( ( sample_size ,) ) x_pp = dummy_simulator ( posterior_samples ) Plotting \\(x_o\\) against the \\(x_{\\text{pp}}\\) , we perform a PPC that plays the role of a sanity check. In this case, the check indicates that \\(x_o\\) falls right within the support of \\(x_{\\text{pp}}\\) , which should make the experimenter rather confident about the estimated posterior : _ = pairplot ( samples = x_pp , points = x_o [ 0 ], limits = torch . tensor ([[ - 2.0 , 5.0 ]] * 5 ), points_colors = \"red\" , figsize = ( 8 , 8 ), offdiag = \"scatter\" , scatter_offdiag = dict ( marker = \".\" , s = 5 ), points_offdiag = dict ( marker = \"+\" , markersize = 20 ), labels = [ rf \"$x_ { d } $\" for d in range ( D )], ) In contrast, \\(x_o\\) falling well outside the support of \\(x_{\\text{pp}}\\) is indicative of a failure to estimate the correct posterior. Here we simulate such a failure mode: error_shift = - 2.0 * torch . ones ( 1 , 5 ) _ = pairplot ( samples = x_pp , points = x_o [ 0 ] + error_shift , limits = torch . tensor ([[ - 2.0 , 5.0 ]] * 5 ), points_colors = \"red\" , figsize = ( 8 , 8 ), offdiag = \"scatter\" , scatter_offdiag = dict ( marker = \".\" , s = 5 ), points_offdiag = dict ( marker = \"+\" , markersize = 20 ), labels = [ rf \"$x_ { d } $\" for d in range ( D )], ) A typical way to investigate this issue would be to run a prior* predictive check , applying the same plotting strategy, but drawing \\(\\theta\\) from the prior instead of the posterior. **The support for \\(x_{\\text{pp}}\\) should be larger and should contain \\(x_o\\) * . If this check is successful, the \u201cblame\u201d can then be shifted to the inference (method used, convergence of density estimators, number of sequential rounds, etc\u2026).","title":"Posterior predictive checks"},{"location":"tutorial/12_diagnostics_posterior_predictive_check/#posterior-predictive-checks-ppc-in-sbi","text":"A common safety check performed as part of inference are Posterior Predictive Checks (PPC) . A PPC compares data \\(x_{\\text{pp}}\\) generated using the parameters \\(\\theta_{\\text{posterior}}\\) sampled from the posterior with the observed data \\(x_o\\) . The general concept is that -if the inference is correct- the generated data \\(x_{\\text{pp}}\\) should \u201clook similar\u201d the oberved data \\(x_0\\) . Said differently, \\(x_o\\) should be within the support of \\(x_{\\text{pp}}\\) . A PPC usually shouldn\u2019t be used as a validation metric . Nonetheless a PPC is a good start for an inference diagnosis and can provide with an intuition about any bias introduced in inference: does \\(x_{\\text{pp}}\\) systematically differ from \\(x_o\\) ?","title":"Posterior Predictive Checks (PPC) in SBI"},{"location":"tutorial/12_diagnostics_posterior_predictive_check/#main-syntax","text":"from sbi.analysis import pairplot # A PPC is performed after we trained or neural posterior posterior . set_default_x ( x_o ) # We draw theta samples from the posterior. This part is not in the scope of SBI posterior_samples = posterior . sample (( 5_000 ,)) # We use posterior theta samples to generate x data x_pp = simulator ( posterior_samples ) # We verify if the observed data falls within the support of the generated data _ = pairplot ( samples = x_pp , points = x_o )","title":"Main syntax"},{"location":"tutorial/12_diagnostics_posterior_predictive_check/#performing-a-ppc-over-a-toy-example","text":"Below we provide an example Posterior Predictive Check (PPC) over some toy example: from sbi.analysis import pairplot import torch _ = torch . manual_seed ( 0 ) We work on an inference problem over three parameters using any of the techniques implemented in sbi . In this tutorial, we load the dummy posterior: from toy_posterior_for_07_cc import ExamplePosterior posterior = ExamplePosterior () Let us say that we are observing the data point \\(x_o\\) : D = 5 # simulator output was 5-dimensional x_o = torch . ones ( 1 , D ) posterior . set_default_x ( x_o ) The posterior can be used to draw \\(\\theta_{\\text{posterior}}\\) samples: posterior_samples = posterior . sample (( 5_000 ,)) fig , ax = pairplot ( samples = posterior_samples , limits = torch . tensor ([[ - 2.5 , 2.5 ]] * 3 ), offdiag = [ \"kde\" ], diag = [ \"kde\" ], figsize = ( 5 , 5 ), labels = [ rf \"$\\theta_ { d } $\" for d in range ( 3 )], ) Now we can use our simulator to generate some data \\(x_{\\text{PP}}\\) , using as input parameters the poterior samples \\(\\theta_{\\text{posterior}}\\) . Note that the simulation part is not in the sbi scope, so any simulator -including a non-Python one- can be used at this stage. In our case we\u2019ll use a dummy simulator: def dummy_simulator ( posterior_samples : torch . Tensor , * args , ** kwargs ) -> torch . Tensor : sample_size = posterior_samples . shape [ 0 ] scale = 1.0 shift = torch . distributions . Gumbel ( loc = torch . zeros ( D ), scale = scale / 2 ) . sample () return torch . distributions . Gumbel ( loc = x_o [ 0 ] + shift , scale = scale ) . sample ( ( sample_size ,) ) x_pp = dummy_simulator ( posterior_samples ) Plotting \\(x_o\\) against the \\(x_{\\text{pp}}\\) , we perform a PPC that plays the role of a sanity check. In this case, the check indicates that \\(x_o\\) falls right within the support of \\(x_{\\text{pp}}\\) , which should make the experimenter rather confident about the estimated posterior : _ = pairplot ( samples = x_pp , points = x_o [ 0 ], limits = torch . tensor ([[ - 2.0 , 5.0 ]] * 5 ), points_colors = \"red\" , figsize = ( 8 , 8 ), offdiag = \"scatter\" , scatter_offdiag = dict ( marker = \".\" , s = 5 ), points_offdiag = dict ( marker = \"+\" , markersize = 20 ), labels = [ rf \"$x_ { d } $\" for d in range ( D )], ) In contrast, \\(x_o\\) falling well outside the support of \\(x_{\\text{pp}}\\) is indicative of a failure to estimate the correct posterior. Here we simulate such a failure mode: error_shift = - 2.0 * torch . ones ( 1 , 5 ) _ = pairplot ( samples = x_pp , points = x_o [ 0 ] + error_shift , limits = torch . tensor ([[ - 2.0 , 5.0 ]] * 5 ), points_colors = \"red\" , figsize = ( 8 , 8 ), offdiag = \"scatter\" , scatter_offdiag = dict ( marker = \".\" , s = 5 ), points_offdiag = dict ( marker = \"+\" , markersize = 20 ), labels = [ rf \"$x_ { d } $\" for d in range ( D )], ) A typical way to investigate this issue would be to run a prior* predictive check , applying the same plotting strategy, but drawing \\(\\theta\\) from the prior instead of the posterior. **The support for \\(x_{\\text{pp}}\\) should be larger and should contain \\(x_o\\) * . If this check is successful, the \u201cblame\u201d can then be shifted to the inference (method used, convergence of density estimators, number of sequential rounds, etc\u2026).","title":"Performing a PPC over a toy example"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/","text":"Simulation-based Calibration in SBI \u00b6 After a density estimator has been trained with simulated data to obtain a posterior, the estimator should be made subject to several diagnostic tests, before being used for inference given the actual observed data. Posterior Predictive Checks (see tutorial 12) provide one way to \u201ccritique\u201d a trained estimator via its predictive performance. Another important approach to such diagnostics is simulation-based calibration as reported by Talts et al, 2018 . Simulation-based calibration (SBC) provides a (qualitative) view and a quantitive measure to check, whether the uncertainties of the posterior are balanced, i.e., neither over-confident nor under-confident. As such, SBC can be viewed as a necessary condition (but not sufficient) for a valid inference algorithm: If SBC checks fail, this tells you that your inference is invalid. If SBC checks pass, this is no guarantee that the posterior estimation is working. In a nutshell \u00b6 To run SBC, we sample theta_o_i values from the prior of the problem at hand we simulate \u201cobservations\u201d from these parameters: x_o_i = simulator(theta_o_i) we perform inference given each observation x_o_i . This produces a separate posterior \\(p_i(\\theta | x_{o,i})\\) for each of x_o_i . The key step for SBC is to generate a set of posterior samples \\(\\{\\theta\\}_i\\) from each posterior (let\u2019s call this theta_i_s , referring to s samples from posterior \\(p_i(\\theta | x_{o,i})\\) ), and to rank the corresponding theta_o_i under this set of samples. A rank is computed by counting how many samples theta_i_s fall below their corresponding theta_o_i (see section 4.1 in Talts et al.). These ranks are then used to perform the SBC check. Key ideas behind SBC \u00b6 The core idea behind SBC is two fold: SBC ranks of ground truth parameters under the inferred posterior samples follow a uniform distribution. (If the SBC ranks are not uniformly distributed, the posterior is not well calibrated.) samples from the data averaged posterior (ensemble of randomly chosen posterior samples given multiple distinct observations x_o ) are distributed according to the prior What can SBC diagnose? \u00b6 SBC can inform us whether we are not wrong. However, it cannot tell us whether we are right, i.e., SBC checks a necessary condition. For example, imagine you run SBC using the prior as a posterior. The ranks would be perfectly uniform. But the inference would be wrong. The Posterior Predictive Checks (see tutorial 12) can be seen as the complementary sufficient check for the posterior (only as a methaphor, no theoretical guarantees here). Using the prior as a posterior and then doing predictive checks would clearly show that inference failed. To summarize SBC can: tell us whether the SBI method applied to the problem at hand produces posteriors that have well-calibrated uncertainties, and if not, what kind of systematic bias it has: negative or positive bias (shift in the mean of the predictions) or over- or underdispersion (too large or too small variance) A healthy posterior \u00b6 Let\u2019s take the gaussian linear simulator from the previous tutorials and run inference with NPE on it. Note: SBC requires running inference several times. Using SBC with amortized methods like NPE is hence a justified endavour: repeated inference is cheap and SBC can be performed with little runtime penalty. This does not hold for sequential methods or anything relying on MCMC or VI (here, parallelization is your friend, num_workers>1 ). import torch _ = torch . manual_seed ( 10 ) from torch import eye , ones , zeros from torch.distributions import MultivariateNormal from sbi.analysis import check_sbc , run_sbc , get_nltp , sbc_rank_plot from sbi.inference import SNPE , SNPE_C , prepare_for_sbi , simulate_for_sbi from sbi.simulators import linear_gaussian , diagonal_linear_gaussian num_dim = 2 num_simulations = 5_000 prior_mean = ones ( num_dim ) prior_cov = 2 * eye ( num_dim ) prior = MultivariateNormal ( loc = prior_mean , covariance_matrix = prior_cov , validate_args = False ) An ideal case \u00b6 To explore SBC, we make our life easy and assume that we deal with a problem where the likelihood is modelled by an identity mapping and a bit of smear. But to start, we only use an almost vanishing smear of 0.01 . default_likelihood_loc = 0.0 # let's start with 0 shift default_likelihood_scale = 0.01 # let's smear theta only by a little bit def simulator ( theta , loc = default_likelihood_loc , scale = default_likelihood_scale ): \"\"\"linear gaussian inspired by sbibm https://github.com/sbi-benchmark/sbibm/blob/15f068a08a938383116ffd92b92de50c580810a3/sbibm/tasks/gaussian_linear/task.py#L74 \"\"\" num_dim = theta . shape [ - 1 ] cov_ = scale * eye ( num_dim ) # always positively semi-definite # using validate_args=False disables sanity checks on `covariance_matrix` # for the sake of speed value = MultivariateNormal ( loc = ( theta + loc ), covariance_matrix = cov_ , validate_args = False ) . sample () return value theta , x = simulate_for_sbi ( simulator , prior , num_simulations ) Running 5000 simulations.: 0%| | 0/5000 [00:001 ). import torch _ = torch . manual_seed ( 10 ) from torch import eye , ones , zeros from torch.distributions import MultivariateNormal from sbi.analysis import check_sbc , run_sbc , get_nltp , sbc_rank_plot from sbi.inference import SNPE , SNPE_C , prepare_for_sbi , simulate_for_sbi from sbi.simulators import linear_gaussian , diagonal_linear_gaussian num_dim = 2 num_simulations = 5_000 prior_mean = ones ( num_dim ) prior_cov = 2 * eye ( num_dim ) prior = MultivariateNormal ( loc = prior_mean , covariance_matrix = prior_cov , validate_args = False )","title":"A healthy posterior"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#an-ideal-case","text":"To explore SBC, we make our life easy and assume that we deal with a problem where the likelihood is modelled by an identity mapping and a bit of smear. But to start, we only use an almost vanishing smear of 0.01 . default_likelihood_loc = 0.0 # let's start with 0 shift default_likelihood_scale = 0.01 # let's smear theta only by a little bit def simulator ( theta , loc = default_likelihood_loc , scale = default_likelihood_scale ): \"\"\"linear gaussian inspired by sbibm https://github.com/sbi-benchmark/sbibm/blob/15f068a08a938383116ffd92b92de50c580810a3/sbibm/tasks/gaussian_linear/task.py#L74 \"\"\" num_dim = theta . shape [ - 1 ] cov_ = scale * eye ( num_dim ) # always positively semi-definite # using validate_args=False disables sanity checks on `covariance_matrix` # for the sake of speed value = MultivariateNormal ( loc = ( theta + loc ), covariance_matrix = cov_ , validate_args = False ) . sample () return value theta , x = simulate_for_sbi ( simulator , prior , num_simulations ) Running 5000 simulations.: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Indeed, with increasing number of trials the posterior density concentrates around the true underlying parameter. IID inference with NLE \u00b6 (S)NLE can easily perform inference given multiple IID x because it is based on learning the likelihood. Once the likelihood is learned on single trials, i.e., a neural network that given a single observation and a parameter predicts the likelihood of that observation given the parameter, one can perform MCMC to obtain posterior samples. MCMC relies on evaluating ratios of likelihoods of candidate parameters to either accept or reject them to be posterior samples. When inferring the posterior given multiple IID observation, these likelihoods are just the joint likelihoods of each IID observation given the current parameter candidate. Thus, given a neural likelihood from SNLE, we can calculate these joint likelihoods and perform MCMC given IID data, we just have to multiply together (or add in log-space) the individual trial-likelihoods ( sbi takes care of that). # Train SNLE. inferer = SNLE ( prior , show_progress_bars = True , density_estimator = \"mdn\" ) theta , x = simulate_for_sbi ( simulator , prior , 10000 , simulation_batch_size = 1000 ) inferer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ); Running 10000 simulations.: 0%| | 0/10000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); The pairplot above already indicates that (S)NLE is well able to obtain accurate posterior samples also for increasing number of trials (note that we trained the single-round version of SNLE so that we did not have to re-train it for new \\(x_o\\) ). Quantitatively we can measure the accuracy of SNLE by calculating the c2st score between SNLE and the true posterior samples, where the best accuracy is perfect for 0.5 : cs = [ c2st ( torch . from_numpy ( s1 ), torch . from_numpy ( s2 )) for s1 , s2 in zip ( true_samples , nle_samples ) ] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.51 c2st score for num_trials=15: 0.51 c2st score for num_trials=20: 0.51 IID inference with NPE using permutation-invariant embedding nets \u00b6 For NPE we need to define an embedding net that handles the set-like structure of iid-data, i.e., that it permutation invariant and can handle different number of trials. We implemented several embedding net classes that allow to construct such a permutation- and number-of-trials invariant embedding net. To become permutation invariant, the neural net first learns embeddings for single trials and then performs a permutation invariant operation on those embeddings, e.g., by taking the sum or the mean (Chen et al. 2018, Radev et al. 2021). To become invariant w.r.t. the number-of-trials, we train the net with varying number of trials for each parameter setting. As it is difficult to handle tensors of varying lengths in the SBI training loop, we construct a training data set in which \u201cunobserved\u201d trials are mask by NaNs (and ignore the resulting SBI warning about NaNs in the training data). Construct training data set. \u00b6 # we need to fix the maximum number of trials. max_num_trials = 20 # construct training data set: we want to cover the full range of possible number of # trials num_training_samples = 5000 theta = prior . sample (( num_training_samples ,)) # there are certainly smarter ways to construct the training data set, but we go with a # for loop here for illustration purposes. x = torch . ones ( num_training_samples * max_num_trials , max_num_trials , x_dim ) * float ( \"nan\" ) for i in range ( num_training_samples ): xi = simulator ( theta [ i ] . repeat ( max_num_trials , 1 )) for j in range ( max_num_trials ): x [ i * max_num_trials + j , : j + 1 , :] = xi [: j + 1 , :] theta = theta . repeat_interleave ( max_num_trials , dim = 0 ) Build embedding net \u00b6 from sbi.neural_nets.embedding_nets import ( FCEmbedding , PermutationInvariantEmbedding , ) from sbi.utils import posterior_nn # embedding latent_dim = 10 single_trial_net = FCEmbedding ( input_dim = theta_dim , num_hiddens = 40 , num_layers = 2 , output_dim = latent_dim , ) embedding_net = PermutationInvariantEmbedding ( single_trial_net , trial_net_output_dim = latent_dim , # NOTE: post-embedding is not needed really. num_layers = 1 , num_hiddens = 10 , output_dim = 10 , ) # we choose a simple MDN as the density estimator. # NOTE: we turn off z-scoring of the data, as we used NaNs for the missing trials. density_estimator = posterior_nn ( \"mdn\" , embedding_net = embedding_net , z_score_x = \"none\" ) Run training \u00b6 inference = SNPE ( prior , density_estimator = density_estimator ) # NOTE: we don't exclude invalid x because we used NaNs for the missing trials. inference . append_simulations ( theta , x , exclude_invalid_x = False , ) . train ( training_batch_size = 1000 ) posterior = inference . build_posterior () WARNING:root:Found 95000 NaN simulations and 0 Inf simulations. They are not excluded from training due to `exclude_invalid_x=False`.Training will likely fail, we strongly recommend `exclude_invalid_x=True` for Single-round NPE. Neural network successfully converged after 168 epochs. Amortized inference \u00b6 Comparing runtimes, we see that the NPE training takes a bit longer than the training on single trials for NLE above. However, we trained the density estimator such that it can handle multiple and changing number of iid trials (up to 20). Thus, we can obtain posterior samples for different x_o with just a single forward pass instead of having to run MCMC for each new observation. As you can see below, the c2st score for increasing number of observed trials remains close to the ideal 0.5 . npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) cs = [ c2st ( torch . from_numpy ( s1 ), s2 ) for s1 , s2 in zip ( true_samples , npe_samples )] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Indeed, with increasing number of trials the posterior density concentrates around the true underlying parameter.","title":"The analytical posterior concentrates around true parameters with increasing number of IID trials"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#iid-inference-with-nle","text":"(S)NLE can easily perform inference given multiple IID x because it is based on learning the likelihood. Once the likelihood is learned on single trials, i.e., a neural network that given a single observation and a parameter predicts the likelihood of that observation given the parameter, one can perform MCMC to obtain posterior samples. MCMC relies on evaluating ratios of likelihoods of candidate parameters to either accept or reject them to be posterior samples. When inferring the posterior given multiple IID observation, these likelihoods are just the joint likelihoods of each IID observation given the current parameter candidate. Thus, given a neural likelihood from SNLE, we can calculate these joint likelihoods and perform MCMC given IID data, we just have to multiply together (or add in log-space) the individual trial-likelihoods ( sbi takes care of that). # Train SNLE. inferer = SNLE ( prior , show_progress_bars = True , density_estimator = \"mdn\" ) theta , x = simulate_for_sbi ( simulator , prior , 10000 , simulation_batch_size = 1000 ) inferer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ); Running 10000 simulations.: 0%| | 0/10000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); The pairplot above already indicates that (S)NLE is well able to obtain accurate posterior samples also for increasing number of trials (note that we trained the single-round version of SNLE so that we did not have to re-train it for new \\(x_o\\) ). Quantitatively we can measure the accuracy of SNLE by calculating the c2st score between SNLE and the true posterior samples, where the best accuracy is perfect for 0.5 : cs = [ c2st ( torch . from_numpy ( s1 ), torch . from_numpy ( s2 )) for s1 , s2 in zip ( true_samples , nle_samples ) ] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.51 c2st score for num_trials=15: 0.51 c2st score for num_trials=20: 0.51","title":"IID inference with NLE"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#iid-inference-with-npe-using-permutation-invariant-embedding-nets","text":"For NPE we need to define an embedding net that handles the set-like structure of iid-data, i.e., that it permutation invariant and can handle different number of trials. We implemented several embedding net classes that allow to construct such a permutation- and number-of-trials invariant embedding net. To become permutation invariant, the neural net first learns embeddings for single trials and then performs a permutation invariant operation on those embeddings, e.g., by taking the sum or the mean (Chen et al. 2018, Radev et al. 2021). To become invariant w.r.t. the number-of-trials, we train the net with varying number of trials for each parameter setting. As it is difficult to handle tensors of varying lengths in the SBI training loop, we construct a training data set in which \u201cunobserved\u201d trials are mask by NaNs (and ignore the resulting SBI warning about NaNs in the training data).","title":"IID inference with NPE using permutation-invariant embedding nets"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#construct-training-data-set","text":"# we need to fix the maximum number of trials. max_num_trials = 20 # construct training data set: we want to cover the full range of possible number of # trials num_training_samples = 5000 theta = prior . sample (( num_training_samples ,)) # there are certainly smarter ways to construct the training data set, but we go with a # for loop here for illustration purposes. x = torch . ones ( num_training_samples * max_num_trials , max_num_trials , x_dim ) * float ( \"nan\" ) for i in range ( num_training_samples ): xi = simulator ( theta [ i ] . repeat ( max_num_trials , 1 )) for j in range ( max_num_trials ): x [ i * max_num_trials + j , : j + 1 , :] = xi [: j + 1 , :] theta = theta . repeat_interleave ( max_num_trials , dim = 0 )","title":"Construct training data set."},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#build-embedding-net","text":"from sbi.neural_nets.embedding_nets import ( FCEmbedding , PermutationInvariantEmbedding , ) from sbi.utils import posterior_nn # embedding latent_dim = 10 single_trial_net = FCEmbedding ( input_dim = theta_dim , num_hiddens = 40 , num_layers = 2 , output_dim = latent_dim , ) embedding_net = PermutationInvariantEmbedding ( single_trial_net , trial_net_output_dim = latent_dim , # NOTE: post-embedding is not needed really. num_layers = 1 , num_hiddens = 10 , output_dim = 10 , ) # we choose a simple MDN as the density estimator. # NOTE: we turn off z-scoring of the data, as we used NaNs for the missing trials. density_estimator = posterior_nn ( \"mdn\" , embedding_net = embedding_net , z_score_x = \"none\" )","title":"Build embedding net"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#run-training","text":"inference = SNPE ( prior , density_estimator = density_estimator ) # NOTE: we don't exclude invalid x because we used NaNs for the missing trials. inference . append_simulations ( theta , x , exclude_invalid_x = False , ) . train ( training_batch_size = 1000 ) posterior = inference . build_posterior () WARNING:root:Found 95000 NaN simulations and 0 Inf simulations. They are not excluded from training due to `exclude_invalid_x=False`.Training will likely fail, we strongly recommend `exclude_invalid_x=True` for Single-round NPE. Neural network successfully converged after 168 epochs.","title":"Run training"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#amortized-inference","text":"Comparing runtimes, we see that the NPE training takes a bit longer than the training on single trials for NLE above. However, we trained the density estimator such that it can handle multiple and changing number of iid trials (up to 20). Thus, we can obtain posterior samples for different x_o with just a single forward pass instead of having to run MCMC for each new observation. As you can see below, the c2st score for increasing number of observed trials remains close to the ideal 0.5 . npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) cs = [ c2st ( torch . from_numpy ( s1 ), s2 ) for s1 , s2 in zip ( true_samples , npe_samples )] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Indeed, with increasing number of trials the posterior density concentrates around the true underlying parameter. IID inference with NLE \u00b6 (S)NLE can easily perform inference given multiple IID x because it is based on learning the likelihood. Once the likelihood is learned on single trials, i.e., a neural network that given a single observation and a parameter predicts the likelihood of that observation given the parameter, one can perform MCMC to obtain posterior samples. MCMC relies on evaluating ratios of likelihoods of candidate parameters to either accept or reject them to be posterior samples. When inferring the posterior given multiple IID observation, these likelihoods are just the joint likelihoods of each IID observation given the current parameter candidate. Thus, given a neural likelihood from SNLE, we can calculate these joint likelihoods and perform MCMC given IID data, we just have to multiply together (or add in log-space) the individual trial-likelihoods ( sbi takes care of that). # Train SNLE. inferer = SNLE ( prior , show_progress_bars = True , density_estimator = \"mdn\" ) theta , x = simulate_for_sbi ( simulator , prior , 10000 , simulation_batch_size = 1000 ) inferer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ); Running 10000 simulations.: 0%| | 0/10000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); The pairplot above already indicates that (S)NLE is well able to obtain accurate posterior samples also for increasing number of trials (note that we trained the single-round version of SNLE so that we did not have to re-train it for new \\(x_o\\) ). Quantitatively we can measure the accuracy of SNLE by calculating the c2st score between SNLE and the true posterior samples, where the best accuracy is perfect for 0.5 : cs = [ c2st ( torch . from_numpy ( s1 ), torch . from_numpy ( s2 )) for s1 , s2 in zip ( true_samples , nle_samples ) ] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.51 c2st score for num_trials=15: 0.51 c2st score for num_trials=20: 0.51 IID inference with NPE using permutation-invariant embedding nets \u00b6 For NPE we need to define an embedding net that handles the set-like structure of iid-data, i.e., that it permutation invariant and can handle different number of trials. We implemented several embedding net classes that allow to construct such a permutation- and number-of-trials invariant embedding net. To become permutation invariant, the neural net first learns embeddings for single trials and then performs a permutation invariant operation on those embeddings, e.g., by taking the sum or the mean (Chen et al. 2018, Radev et al. 2021). To become invariant w.r.t. the number-of-trials, we train the net with varying number of trials for each parameter setting. As it is difficult to handle tensors of varying lengths in the SBI training loop, we construct a training data set in which \u201cunobserved\u201d trials are mask by NaNs (and ignore the resulting SBI warning about NaNs in the training data). Construct training data set. \u00b6 # we need to fix the maximum number of trials. max_num_trials = 20 # construct training data set: we want to cover the full range of possible number of # trials num_training_samples = 5000 theta = prior . sample (( num_training_samples ,)) # there are certainly smarter ways to construct the training data set, but we go with a # for loop here for illustration purposes. x = torch . ones ( num_training_samples * max_num_trials , max_num_trials , x_dim ) * float ( \"nan\" ) for i in range ( num_training_samples ): xi = simulator ( theta [ i ] . repeat ( max_num_trials , 1 )) for j in range ( max_num_trials ): x [ i * max_num_trials + j , : j + 1 , :] = xi [: j + 1 , :] theta = theta . repeat_interleave ( max_num_trials , dim = 0 ) Build embedding net \u00b6 from sbi.neural_nets.embedding_nets import ( FCEmbedding , PermutationInvariantEmbedding , ) from sbi.utils import posterior_nn # embedding latent_dim = 10 single_trial_net = FCEmbedding ( input_dim = theta_dim , num_hiddens = 40 , num_layers = 2 , output_dim = latent_dim , ) embedding_net = PermutationInvariantEmbedding ( single_trial_net , trial_net_output_dim = latent_dim , # NOTE: post-embedding is not needed really. num_layers = 1 , num_hiddens = 10 , output_dim = 10 , ) # we choose a simple MDN as the density estimator. # NOTE: we turn off z-scoring of the data, as we used NaNs for the missing trials. density_estimator = posterior_nn ( \"mdn\" , embedding_net = embedding_net , z_score_x = \"none\" ) Run training \u00b6 inference = SNPE ( prior , density_estimator = density_estimator ) # NOTE: we don't exclude invalid x because we used NaNs for the missing trials. inference . append_simulations ( theta , x , exclude_invalid_x = False , ) . train ( training_batch_size = 1000 ) posterior = inference . build_posterior () WARNING:root:Found 95000 NaN simulations and 0 Inf simulations. They are not excluded from training due to `exclude_invalid_x=False`.Training will likely fail, we strongly recommend `exclude_invalid_x=True` for Single-round NPE. Neural network successfully converged after 168 epochs. Amortized inference \u00b6 Comparing runtimes, we see that the NPE training takes a bit longer than the training on single trials for NLE above. However, we trained the density estimator such that it can handle multiple and changing number of iid trials (up to 20). Thus, we can obtain posterior samples for different x_o with just a single forward pass instead of having to run MCMC for each new observation. As you can see below, the c2st score for increasing number of observed trials remains close to the ideal 0.5 . npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) cs = [ c2st ( torch . from_numpy ( s1 ), s2 ) for s1 , s2 in zip ( true_samples , npe_samples )] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Indeed, with increasing number of trials the posterior density concentrates around the true underlying parameter.","title":"The analytical posterior concentrates around true parameters with increasing number of IID trials"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#iid-inference-with-nle","text":"(S)NLE can easily perform inference given multiple IID x because it is based on learning the likelihood. Once the likelihood is learned on single trials, i.e., a neural network that given a single observation and a parameter predicts the likelihood of that observation given the parameter, one can perform MCMC to obtain posterior samples. MCMC relies on evaluating ratios of likelihoods of candidate parameters to either accept or reject them to be posterior samples. When inferring the posterior given multiple IID observation, these likelihoods are just the joint likelihoods of each IID observation given the current parameter candidate. Thus, given a neural likelihood from SNLE, we can calculate these joint likelihoods and perform MCMC given IID data, we just have to multiply together (or add in log-space) the individual trial-likelihoods ( sbi takes care of that). # Train SNLE. inferer = SNLE ( prior , show_progress_bars = True , density_estimator = \"mdn\" ) theta , x = simulate_for_sbi ( simulator , prior , 10000 , simulation_batch_size = 1000 ) inferer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ); Running 10000 simulations.: 0%| | 0/10000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); The pairplot above already indicates that (S)NLE is well able to obtain accurate posterior samples also for increasing number of trials (note that we trained the single-round version of SNLE so that we did not have to re-train it for new \\(x_o\\) ). Quantitatively we can measure the accuracy of SNLE by calculating the c2st score between SNLE and the true posterior samples, where the best accuracy is perfect for 0.5 : cs = [ c2st ( torch . from_numpy ( s1 ), torch . from_numpy ( s2 )) for s1 , s2 in zip ( true_samples , nle_samples ) ] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.51 c2st score for num_trials=15: 0.51 c2st score for num_trials=20: 0.51","title":"IID inference with NLE"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#iid-inference-with-npe-using-permutation-invariant-embedding-nets","text":"For NPE we need to define an embedding net that handles the set-like structure of iid-data, i.e., that it permutation invariant and can handle different number of trials. We implemented several embedding net classes that allow to construct such a permutation- and number-of-trials invariant embedding net. To become permutation invariant, the neural net first learns embeddings for single trials and then performs a permutation invariant operation on those embeddings, e.g., by taking the sum or the mean (Chen et al. 2018, Radev et al. 2021). To become invariant w.r.t. the number-of-trials, we train the net with varying number of trials for each parameter setting. As it is difficult to handle tensors of varying lengths in the SBI training loop, we construct a training data set in which \u201cunobserved\u201d trials are mask by NaNs (and ignore the resulting SBI warning about NaNs in the training data).","title":"IID inference with NPE using permutation-invariant embedding nets"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#construct-training-data-set","text":"# we need to fix the maximum number of trials. max_num_trials = 20 # construct training data set: we want to cover the full range of possible number of # trials num_training_samples = 5000 theta = prior . sample (( num_training_samples ,)) # there are certainly smarter ways to construct the training data set, but we go with a # for loop here for illustration purposes. x = torch . ones ( num_training_samples * max_num_trials , max_num_trials , x_dim ) * float ( \"nan\" ) for i in range ( num_training_samples ): xi = simulator ( theta [ i ] . repeat ( max_num_trials , 1 )) for j in range ( max_num_trials ): x [ i * max_num_trials + j , : j + 1 , :] = xi [: j + 1 , :] theta = theta . repeat_interleave ( max_num_trials , dim = 0 )","title":"Construct training data set."},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#build-embedding-net","text":"from sbi.neural_nets.embedding_nets import ( FCEmbedding , PermutationInvariantEmbedding , ) from sbi.utils import posterior_nn # embedding latent_dim = 10 single_trial_net = FCEmbedding ( input_dim = theta_dim , num_hiddens = 40 , num_layers = 2 , output_dim = latent_dim , ) embedding_net = PermutationInvariantEmbedding ( single_trial_net , trial_net_output_dim = latent_dim , # NOTE: post-embedding is not needed really. num_layers = 1 , num_hiddens = 10 , output_dim = 10 , ) # we choose a simple MDN as the density estimator. # NOTE: we turn off z-scoring of the data, as we used NaNs for the missing trials. density_estimator = posterior_nn ( \"mdn\" , embedding_net = embedding_net , z_score_x = \"none\" )","title":"Build embedding net"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#run-training","text":"inference = SNPE ( prior , density_estimator = density_estimator ) # NOTE: we don't exclude invalid x because we used NaNs for the missing trials. inference . append_simulations ( theta , x , exclude_invalid_x = False , ) . train ( training_batch_size = 1000 ) posterior = inference . build_posterior () WARNING:root:Found 95000 NaN simulations and 0 Inf simulations. They are not excluded from training due to `exclude_invalid_x=False`.Training will likely fail, we strongly recommend `exclude_invalid_x=True` for Single-round NPE. Neural network successfully converged after 168 epochs.","title":"Run training"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#amortized-inference","text":"Comparing runtimes, we see that the NPE training takes a bit longer than the training on single trials for NLE above. However, we trained the density estimator such that it can handle multiple and changing number of iid trials (up to 20). Thus, we can obtain posterior samples for different x_o with just a single forward pass instead of having to run MCMC for each new observation. As you can see below, the c2st score for increasing number of observed trials remains close to the ideal 0.5 . npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) cs = [ c2st ( torch . from_numpy ( s1 ), s2 ) for s1 , s2 in zip ( true_samples , npe_samples )] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 Dimensions: (chain: 4, draw: 1254, theta_dim_0: 2) Coordinates: * chain (chain) int64 0 1 2 3 * draw (draw) int64 0 1 2 3 4 5 6 ... 1248 1249 1250 1251 1252 1253 * theta_dim_0 (theta_dim_0) int64 0 1 Data variables: theta (chain, draw, theta_dim_0) float32 2.125 0.8092 ... 0.8088 Attributes: created_at: 2022-08-10T14:02:41.300799 arviz_version: 0.11.2 Diagnostic plots \u00b6 az . style . use ( \"arviz-darkgrid\" ) az . plot_rank ( inference_data ) array([, ], dtype=object) az . plot_autocorr ( inference_data ); az . plot_trace ( inference_data , compact = False ); az . plot_ess ( inference_data , kind = \"evolution\" ); Posterior density plots \u00b6 az . plot_posterior ( inference_data ) array([, ], dtype=object) print ( f \"Given the { num_trials } we observed, the posterior is centered around true underlying parameters theta_o: { theta_o } \" ) Given the 100 we observed, the posterior is centered around true underlying parameters theta_o: tensor([[1.9622, 0.7550]]) az . plot_pair ( inference_data ) az . plot_pair ( inference_data , var_names = [ \"theta\" ], kind = \"hexbin\" , marginals = True , figsize = ( 10 , 10 ), ) array([[, None], [, ]], dtype=object)","title":"Density plots and MCMC diagnostics with ArviZ"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#mcmc-diagnostics-with-arviz","text":"This tutorial shows how to evaluate the quality of MCMC samples generated via sbi using the arviz package. Outline: 1) Train MNLE to approximate the likelihood underlying the simulator 2) Run MCMC using pyro MCMC samplers via sbi interface 3) Use arviz to visualize the posterior, predictive distributions and MCMC diagnostics. import arviz as az import torch from sbi.inference import MNLE , likelihood_estimator_based_potential from pyro.distributions import InverseGamma from torch.distributions import Beta , Binomial , Gamma from sbi.utils import MultipleIndependent from sbi.inference import MCMCPosterior # Seeding torch . manual_seed ( 1 ); # Toy simulator for mixed data def mixed_simulator ( theta ): beta , ps = theta [:, : 1 ], theta [:, 1 :] choices = Binomial ( probs = ps ) . sample () rts = InverseGamma ( concentration = 2 * torch . ones_like ( beta ), rate = beta ) . sample () return torch . cat (( rts , choices ), dim = 1 ) # Define independent priors for each dimension. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , )","title":"MCMC diagnostics with Arviz"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#train-mnle-to-approximate-the-likelihood","text":"For this tutorial, we will use a simple simulator with two parameters. For details see the example on the decision making model . Here, we pass mcmc_method=\"nuts\" in order to use the underlying pyro No-U-turn sampler , but it would work as well with other samplers (e.g. \u201cslice_np_vectorized\u201d, \u201chmc\u201d). Additionally, when calling posterior.sample(...) we pass return_arviz=True so that the Arviz InferenceData object is returned. This object gives us access to the wealth of MCMC diagnostics tool provided by arviz . # Generate training data and train MNLE. num_simulations = 10000 theta = prior . sample (( num_simulations ,)) x = mixed_simulator ( theta ) trainer = MNLE ( prior ) likelihood_estimator = trainer . append_simulations ( theta , x ) . train () /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 65 epochs.","title":"Train MNLE to approximate the likelihood"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#run-pyro-nuts-mcmc-and-obtain-arviz-inferencedata-object","text":"# Simulate \"observed\" data x_o torch . manual_seed ( 42 ) num_trials = 100 theta_o = prior . sample (( 1 ,)) x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) # Set MCMC parameters and run Pyro NUTS. mcmc_parameters = dict ( num_chains = 4 , thin = 5 , warmup_steps = 50 , init_strategy = \"proposal\" , method = \"nuts\" , ) num_samples = 1000 # get the potential function and parameter transform for constructing the posterior potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) mnle_posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform , ** mcmc_parameters ) mnle_samples = mnle_posterior . sample ( ( num_samples ,), x = x_o , show_progress_bars = False ) # get arviz InferenceData object from posterior inference_data = mnle_posterior . get_arviz_inference_data () /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:280: UserWarning: An x with a batch size of 100 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn(","title":"Run Pyro NUTS MCMC and obtain arviz InferenceData object"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#generate-arviz-plots","text":"The resulting InferenceData object can be passed to most arviz plotting functions, and there are plenty see here for an overview. To get a better understanding of the InferenceData object see here . Below and overview of common MCMC diagnostics plot, see the corresponding arviz documentation for interpretation of the plots. We will a full use-case using the SBI-MCMC-arviz workflow soon. print ( inference_data . posterior ) Dimensions: (chain: 4, draw: 1254, theta_dim_0: 2) Coordinates: * chain (chain) int64 0 1 2 3 * draw (draw) int64 0 1 2 3 4 5 6 ... 1248 1249 1250 1251 1252 1253 * theta_dim_0 (theta_dim_0) int64 0 1 Data variables: theta (chain, draw, theta_dim_0) float32 2.125 0.8092 ... 0.8088 Attributes: created_at: 2022-08-10T14:02:41.300799 arviz_version: 0.11.2","title":"Generate arviz plots"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#diagnostic-plots","text":"az . style . use ( \"arviz-darkgrid\" ) az . plot_rank ( inference_data ) array([, ], dtype=object) az . plot_autocorr ( inference_data ); az . plot_trace ( inference_data , compact = False ); az . plot_ess ( inference_data , kind = \"evolution\" );","title":"Diagnostic plots"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#posterior-density-plots","text":"az . plot_posterior ( inference_data ) array([, ], dtype=object) print ( f \"Given the { num_trials } we observed, the posterior is centered around true underlying parameters theta_o: { theta_o } \" ) Given the 100 we observed, the posterior is centered around true underlying parameters theta_o: tensor([[1.9622, 0.7550]]) az . plot_pair ( inference_data ) az . plot_pair ( inference_data , var_names = [ \"theta\" ], kind = \"hexbin\" , marginals = True , figsize = ( 10 , 10 ), ) array([[, None], [, ]], dtype=object)","title":"Posterior density plots"},{"location":"tutorial/16_implemented_methods/","text":"API of implemented methods \u00b6 This notebook spells out the API for all algorithms implemented in the sbi toolbox: Posterior estimation (SNPE) Likelihood estimation (SNLE) Likelihood-ratio estimation (SNRE) Utilities Posterior estimation (SNPE) \u00b6 Fast \u03b5-free Inference of Simulation Models with Bayesian Conditional Density Estimation by Papamakarios & Murray (NeurIPS 2016) [PDF] [BibTeX] from sbi.inference import SNPE_A inference = SNPE_A ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Automatic posterior transformation for likelihood-free inference by Greenberg, Nonnenmacher & Macke (ICML 2019) [PDF] from sbi.inference import SNPE inference = SNPE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Truncated proposals for scalable and hassle-free simulation-based inference by Deistler, Goncalves & Macke (NeurIPS 2022) [Paper] from sbi.inference import SNPE from sbi.utils import get_density_thresholder , RestrictedPrior inference = SNPE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( force_first_round_loss = True ) posterior = inference . build_posterior () . set_default_x ( x_o ) accept_reject_fn = get_density_thresholder ( posterior , quantile = 1e-4 ) proposal = RestrictedPrior ( prior , accept_reject_fn , sample_with = \"rejection\" ) Likelihood estimation (SNLE) \u00b6 Sequential neural likelihood: Fast likelihood-free inference with autoregressive flows by Papamakarios, Sterratt & Murray (AISTATS 2019) [PDF] [BibTeX] from sbi.inference import SNLE inference = SNLE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Variational methods for simulation-based inference by Gl\u00f6ckler, Deistler, Macke (ICLR 2022) [Paper] from sbi.inference import SNLE inference = SNLE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior ( sample_with = \"vi\" , vi_method = \"fKL\" ) . set_default_x ( x_o ) proposal = posterior Flexible and efficient simulation-based inference for models of decision-making by Boelts, Lueckmann, Gao, Macke (Elife 2022) [Paper] from sbi.inference import MNLE inference = MNLE ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) Likelihood-ratio estimation (SNRE) \u00b6 Likelihood-free MCMC with Amortized Approximate Likelihood Ratios by Hermans, Begy & Louppe (ICML 2020) [PDF] from sbi.inference import SNRE_A inference = SNRE_A ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) On Contrastive Learning for Likelihood-free Inference Durkan, Murray & Papamakarios (ICML 2020) [PDF] . from sbi.inference import SNRE inference = SNRE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation by Delaunoy, Hermans, Rozet, Wehenkel & Louppe (NeurIPS 2022) [PDF] from sbi.inference import BNRE inference = BNRE ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( regularization_strength = 100. ) posterior = inference . build_posterior () . set_default_x ( x_o ) Contrastive Neural Ratio Estimation Benjamin Kurt Miller, Christoph Weniger, Patrick Forr\u00e9 (NeurIPS 2022) [PDF] # The main feature of NRE-C is producing an exact ratio of densities at optimum, even when using multiple contrastive pairs (classes). from sbi.inference import SNRE_C # Amortized inference inference = SNRE_C ( prior ) proposal = prior theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( num_classes = 5 , # SNRE_C sees `2 * num_classes - 1` marginally drawn contrastive pairs. gamma = 1.0 , # SNRE_C can control the weight between terms in its loss function. ) posterior = inference . build_posterior () . set_default_x ( x_o ) Utilities \u00b6 Simulation-based calibration by Talts, Betancourt, Simpson, Vehtari, Gelman (arxiv 2018) [Paper] ) from sbi.analysis import run_sbc , sbc_rank_plot thetas = prior . sample (( 1_000 ,)) xs = simulator ( thetas ) ranks , dap_samples = run_sbc ( thetas , xs , posterior , num_posterior_samples = 1_000 ) _ = sbc_rank_plot ( ranks = ranks , num_posterior_samples = num_posterior_samples , plot_type = \"hist\" , num_bins = None , ) Restriction estimator by Deistler, Macke & Goncalves (PNAS 2022) [Paper] from sbi.inference import SNPE from sbi.utils import RestrictionEstimator restriction_estimator = RestrictionEstimator ( prior = prior ) proposal = prior for _ in range ( num_rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) restriction_estimator . append_simulations ( theta , x ) classifier = restriction_estimator . train () proposal = restriction_estimator . restrict_prior () all_theta , all_x , _ = restriction_estimator . get_simulations () inference = SNPE ( prior ) density_estimator = inference . append_simulations ( all_theta , all_x ) . train () posterior = inference . build_posterior () Expected coverage (sample-based) as computed in Deistler, Goncalves, Macke (Neurips 2022) [Paper] and in Rozet, Louppe (2021) [Paper] from sbi.analysis import run_sbc , sbc_rank_plot thetas = prior . sample (( 1_000 ,)) xs = simulator ( thetas ) ranks , dap_samples = run_sbc ( thetas , xs , posterior , num_posterior_samples = 1_000 , reduce_fns = posterior . log_prob ) _ = sbc_rank_plot ( ranks = ranks , num_posterior_samples = num_posterior_samples , plot_type = \"hist\" , num_bins = None , )","title":"Implemented algorithms"},{"location":"tutorial/16_implemented_methods/#api-of-implemented-methods","text":"This notebook spells out the API for all algorithms implemented in the sbi toolbox: Posterior estimation (SNPE) Likelihood estimation (SNLE) Likelihood-ratio estimation (SNRE) Utilities","title":"API of implemented methods"},{"location":"tutorial/16_implemented_methods/#posterior-estimation-snpe","text":"Fast \u03b5-free Inference of Simulation Models with Bayesian Conditional Density Estimation by Papamakarios & Murray (NeurIPS 2016) [PDF] [BibTeX] from sbi.inference import SNPE_A inference = SNPE_A ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Automatic posterior transformation for likelihood-free inference by Greenberg, Nonnenmacher & Macke (ICML 2019) [PDF] from sbi.inference import SNPE inference = SNPE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Truncated proposals for scalable and hassle-free simulation-based inference by Deistler, Goncalves & Macke (NeurIPS 2022) [Paper] from sbi.inference import SNPE from sbi.utils import get_density_thresholder , RestrictedPrior inference = SNPE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( force_first_round_loss = True ) posterior = inference . build_posterior () . set_default_x ( x_o ) accept_reject_fn = get_density_thresholder ( posterior , quantile = 1e-4 ) proposal = RestrictedPrior ( prior , accept_reject_fn , sample_with = \"rejection\" )","title":"Posterior estimation (SNPE)"},{"location":"tutorial/16_implemented_methods/#likelihood-estimation-snle","text":"Sequential neural likelihood: Fast likelihood-free inference with autoregressive flows by Papamakarios, Sterratt & Murray (AISTATS 2019) [PDF] [BibTeX] from sbi.inference import SNLE inference = SNLE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Variational methods for simulation-based inference by Gl\u00f6ckler, Deistler, Macke (ICLR 2022) [Paper] from sbi.inference import SNLE inference = SNLE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior ( sample_with = \"vi\" , vi_method = \"fKL\" ) . set_default_x ( x_o ) proposal = posterior Flexible and efficient simulation-based inference for models of decision-making by Boelts, Lueckmann, Gao, Macke (Elife 2022) [Paper] from sbi.inference import MNLE inference = MNLE ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o )","title":"Likelihood estimation (SNLE)"},{"location":"tutorial/16_implemented_methods/#likelihood-ratio-estimation-snre","text":"Likelihood-free MCMC with Amortized Approximate Likelihood Ratios by Hermans, Begy & Louppe (ICML 2020) [PDF] from sbi.inference import SNRE_A inference = SNRE_A ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) On Contrastive Learning for Likelihood-free Inference Durkan, Murray & Papamakarios (ICML 2020) [PDF] . from sbi.inference import SNRE inference = SNRE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation by Delaunoy, Hermans, Rozet, Wehenkel & Louppe (NeurIPS 2022) [PDF] from sbi.inference import BNRE inference = BNRE ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( regularization_strength = 100. ) posterior = inference . build_posterior () . set_default_x ( x_o ) Contrastive Neural Ratio Estimation Benjamin Kurt Miller, Christoph Weniger, Patrick Forr\u00e9 (NeurIPS 2022) [PDF] # The main feature of NRE-C is producing an exact ratio of densities at optimum, even when using multiple contrastive pairs (classes). from sbi.inference import SNRE_C # Amortized inference inference = SNRE_C ( prior ) proposal = prior theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( num_classes = 5 , # SNRE_C sees `2 * num_classes - 1` marginally drawn contrastive pairs. gamma = 1.0 , # SNRE_C can control the weight between terms in its loss function. ) posterior = inference . build_posterior () . set_default_x ( x_o )","title":"Likelihood-ratio estimation (SNRE)"},{"location":"tutorial/16_implemented_methods/#utilities","text":"Simulation-based calibration by Talts, Betancourt, Simpson, Vehtari, Gelman (arxiv 2018) [Paper] ) from sbi.analysis import run_sbc , sbc_rank_plot thetas = prior . sample (( 1_000 ,)) xs = simulator ( thetas ) ranks , dap_samples = run_sbc ( thetas , xs , posterior , num_posterior_samples = 1_000 ) _ = sbc_rank_plot ( ranks = ranks , num_posterior_samples = num_posterior_samples , plot_type = \"hist\" , num_bins = None , ) Restriction estimator by Deistler, Macke & Goncalves (PNAS 2022) [Paper] from sbi.inference import SNPE from sbi.utils import RestrictionEstimator restriction_estimator = RestrictionEstimator ( prior = prior ) proposal = prior for _ in range ( num_rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) restriction_estimator . append_simulations ( theta , x ) classifier = restriction_estimator . train () proposal = restriction_estimator . restrict_prior () all_theta , all_x , _ = restriction_estimator . get_simulations () inference = SNPE ( prior ) density_estimator = inference . append_simulations ( all_theta , all_x ) . train () posterior = inference . build_posterior () Expected coverage (sample-based) as computed in Deistler, Goncalves, Macke (Neurips 2022) [Paper] and in Rozet, Louppe (2021) [Paper] from sbi.analysis import run_sbc , sbc_rank_plot thetas = prior . sample (( 1_000 ,)) xs = simulator ( thetas ) ranks , dap_samples = run_sbc ( thetas , xs , posterior , num_posterior_samples = 1_000 , reduce_fns = posterior . log_prob ) _ = sbc_rank_plot ( ranks = ranks , num_posterior_samples = num_posterior_samples , plot_type = \"hist\" , num_bins = None , )","title":"Utilities"},{"location":"tutorial/17_SBI_for_models_of_decision_making/","text":"SBI with mixed data, iid data, and experimental conditions \u00b6 For a general tutorial on using SBI with trial-based iid data, see tutorial 14 . Here, we cover the use-case often occurring in models of decision-making: trial-based data with mixed data types and varying experimental conditions. Trial-based SBI with mixed data types \u00b6 In some cases, models with trial-based data additionally return data with mixed data types, e.g., continous and discrete data. For example, most computational models of decision-making have continuous reaction times and discrete choices as output. This can induce a problem when performing trial-based SBI that relies on learning a neural likelihood: It is challenging for most density estimators to handle both, continuous and discrete data at the same time. However, there is a recent SBI method for solving this problem, it\u2019s called Mixed Neural Likelihood Estimation (MNLE). It works just like NLE, but with mixed data types. The trick is that it learns two separate density estimators, one for the discrete part of the data, and one for the continuous part, and combines the two to obtain the final neural likelihood. Crucially, the continuous density estimator is trained conditioned on the output of the discrete one, such that statistical dependencies between the discrete and continuous data (e.g., between choices and reaction times) are modeled as well. The interested reader is referred to the original paper available here . MNLE was recently added to sbi (see this PR and also issue ) and follow the same API as SNLE . In this tutorial we will show how to apply MNLE to mixed data, and how to deal with varying experimental conditions. Toy problem for MNLE \u00b6 To illustrate MNLE we set up a toy simulator that outputs mixed data and for which we know the likelihood such we can obtain reference posterior samples via MCMC. Simulator : To simulate mixed data we do the following Sample reaction time from inverse Gamma Sample choices from Binomial Return reaction time \\(rt \\in (0, \\infty)\\) and choice index \\(c \\in \\{0, 1\\}\\) \\[ c \\sim \\text{Binomial}(\\rho) \\\\ rt \\sim \\text{InverseGamma}(\\alpha=2, \\beta) \\\\ \\] Prior : The priors of the two parameters \\(\\rho\\) and \\(\\beta\\) are independent. We define a Beta prior over the probabilty parameter of the Binomial used in the simulator and a Gamma prior over the shape-parameter of the inverse Gamma used in the simulator: \\[ p(\\beta, \\rho) = p(\\beta) \\; p(\\rho) ; \\\\ p(\\beta) = \\text{Gamma}(1, 0.5) \\\\ p(\\text{probs}) = \\text{Beta}(2, 2) \\] Because the InverseGamma and the Binomial likelihoods are well-defined we can perform MCMC on this problem and obtain reference-posterior samples. import matplotlib.pyplot as plt import torch from torch import Tensor from sbi.inference import MNLE from pyro.distributions import InverseGamma from torch.distributions import Beta , Binomial , Categorical , Gamma from sbi.utils import MultipleIndependent from sbi.utils.metrics import c2st from sbi.analysis import pairplot from sbi.inference import MCMCPosterior from sbi.utils.torchutils import atleast_2d from sbi.inference.potentials.likelihood_based_potential import ( MixedLikelihoodBasedPotential , ) from sbi.utils.conditional_density_utils import ConditionedPotential from sbi.utils import mcmc_transform from sbi.inference.potentials.base_potential import BasePotential # Toy simulator for mixed data def mixed_simulator ( theta : Tensor , concentration_scaling : float = 1.0 ): \"\"\"Returns a sample from a mixed distribution given parameters theta. Args: theta: batch of parameters, shape (batch_size, 2) concentration_scaling: scaling factor for the concentration parameter of the InverseGamma distribution, mimics an experimental condition. \"\"\" beta , ps = theta [:, : 1 ], theta [:, 1 :] choices = Binomial ( probs = ps ) . sample () rts = InverseGamma ( concentration = concentration_scaling * torch . ones_like ( beta ), rate = beta ) . sample () return torch . cat (( rts , choices ), dim = 1 ) # The potential function defines the ground truth likelihood and allows us to obtain reference posterior samples via MCMC. class PotentialFunctionProvider ( BasePotential ): allow_iid_x = True # type: ignore def __init__ ( self , prior , x_o , concentration_scaling = 1.0 , device = \"cpu\" ): super () . __init__ ( prior , x_o , device ) self . concentration_scaling = concentration_scaling def __call__ ( self , theta , track_gradients : bool = True ): theta = atleast_2d ( theta ) with torch . set_grad_enabled ( track_gradients ): iid_ll = self . iid_likelihood ( theta ) return iid_ll + self . prior . log_prob ( theta ) def iid_likelihood ( self , theta ): lp_choices = torch . stack ( [ Binomial ( probs = th . reshape ( 1 , - 1 )) . log_prob ( self . x_o [:, 1 :]) for th in theta [:, 1 :] ], dim = 1 , ) lp_rts = torch . stack ( [ InverseGamma ( concentration = self . concentration_scaling * torch . ones_like ( beta_i ), rate = beta_i , ) . log_prob ( self . x_o [:, : 1 ]) for beta_i in theta [:, : 1 ] ], dim = 1 , ) joint_likelihood = ( lp_choices + lp_rts ) . squeeze () assert joint_likelihood . shape == torch . Size ([ self . x_o . shape [ 0 ], theta . shape [ 0 ]]) return joint_likelihood . sum ( 0 ) # Define independent prior. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , ) Obtain reference-posterior samples via analytical likelihood and MCMC \u00b6 torch . manual_seed ( 42 ) num_trials = 10 num_samples = 1000 theta_o = prior . sample (( 1 ,)) x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) mcmc_kwargs = dict ( num_chains = 20 , warmup_steps = 50 , method = \"slice_np_vectorized\" , init_strategy = \"proposal\" , ) true_posterior = MCMCPosterior ( potential_fn = PotentialFunctionProvider ( prior , x_o ), proposal = prior , theta_transform = mcmc_transform ( prior , enable_transform = True ), ** mcmc_kwargs , ) true_samples = true_posterior . sample (( num_samples ,)) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 10 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00 1 . For SNLE or SNRE, MCMC sampling is required, which is computationally expensive. With SNVI (sequential neural variational inference), it is possible to directly sample from the posterior without any corrections during training or without expensive MCMC for sampling. This is possible by learning the posterior with variational inference techniques. For this, an additional network (one for the likelihood or likelihood-to-evidence-ratio) must be trained first. Main syntax \u00b6 inference = SNLE ( prior ) for _ in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposal , num_simulations = 500 ) # In `SNLE` and `SNRE`, you should not pass the `proposal` to ` # .append_simulations()`. likelihood_estimator = inference . append_simulations ( theta , x , ) . train () # Obtain potential (learned likelihood * prior) and theta transformation. potential_fn , theta_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) # Build posterior via variational inference. posterior = VIPosterior ( potential_fn , prior , \"maf\" , theta_transform , vi_method = \"fKL\" , ) . train () proposal = posterior Linear Gaussian example \u00b6 Below, we give a full example of inferring the posterior distribution with SNVI over multiple rounds. For this, we take the same example as in the previous tutorial. import torch from sbi.inference import ( likelihood_estimator_based_potential , SNLE , prepare_for_sbi , simulate_for_sbi , VIPosterior , ) from sbi import utils as utils from sbi import analysis as analysis _ = torch . manual_seed ( 0 ) num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 simulator , prior = prepare_for_sbi ( linear_gaussian , prior ) Here, we decide to learn the likelihood directly, but learning the likelihood-to-evidence ratio would also be a viable option. inference = SNLE ( prior = prior ) Now we can run inference, where we first learn the likelihood, which is then in turn used to learn a posterior through variational inference. num_rounds = 2 x_o = torch . zeros ( 3 ,) posteriors = [] proposal = prior for _ in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposal , num_simulations = 500 ) likelihood_estimator = inference . append_simulations ( theta , x , ) . train () potential_fn , theta_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) posterior = VIPosterior ( potential_fn , prior , \"maf\" , theta_transform , vi_method = \"fKL\" , ) . train () posteriors . append ( posterior ) proposal = posterior Running 500 simulations.: 0%| | 0/500 [00:00 1 . For SNLE or SNRE, MCMC sampling is required, which is computationally expensive. With SNVI (sequential neural variational inference), it is possible to directly sample from the posterior without any corrections during training or without expensive MCMC for sampling. This is possible by learning the posterior with variational inference techniques. For this, an additional network (one for the likelihood or likelihood-to-evidence-ratio) must be trained first.","title":"Using Variational Inference for Building Posteriors"},{"location":"tutorial/17_vi_posteriors/#main-syntax","text":"inference = SNLE ( prior ) for _ in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposal , num_simulations = 500 ) # In `SNLE` and `SNRE`, you should not pass the `proposal` to ` # .append_simulations()`. likelihood_estimator = inference . append_simulations ( theta , x , ) . train () # Obtain potential (learned likelihood * prior) and theta transformation. potential_fn , theta_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) # Build posterior via variational inference. posterior = VIPosterior ( potential_fn , prior , \"maf\" , theta_transform , vi_method = \"fKL\" , ) . train () proposal = posterior","title":"Main syntax"},{"location":"tutorial/17_vi_posteriors/#linear-gaussian-example","text":"Below, we give a full example of inferring the posterior distribution with SNVI over multiple rounds. For this, we take the same example as in the previous tutorial. import torch from sbi.inference import ( likelihood_estimator_based_potential , SNLE , prepare_for_sbi , simulate_for_sbi , VIPosterior , ) from sbi import utils as utils from sbi import analysis as analysis _ = torch . manual_seed ( 0 ) num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 simulator , prior = prepare_for_sbi ( linear_gaussian , prior ) Here, we decide to learn the likelihood directly, but learning the likelihood-to-evidence ratio would also be a viable option. inference = SNLE ( prior = prior ) Now we can run inference, where we first learn the likelihood, which is then in turn used to learn a posterior through variational inference. num_rounds = 2 x_o = torch . zeros ( 3 ,) posteriors = [] proposal = prior for _ in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposal , num_simulations = 500 ) likelihood_estimator = inference . append_simulations ( theta , x , ) . train () potential_fn , theta_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) posterior = VIPosterior ( potential_fn , prior , \"maf\" , theta_transform , vi_method = \"fKL\" , ) . train () posteriors . append ( posterior ) proposal = posterior Running 500 simulations.: 0%| | 0/500 [00:00 NeuralPosterior : r \"\"\"Runs simulation-based inference and returns the posterior. This function provides a simple interface to run sbi. Inference is run for a single round and hence the returned posterior $p(\\theta|x)$ can be sampled and evaluated for any $x$ (i.e. it is amortized). The scope of this function is limited to the most essential features of sbi. For more flexibility (e.g. multi-round inference, different density estimators) please use the flexible interface described here: https://www.mackelab.org/sbi/tutorial/02_flexible_interface/ Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\mathrm{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. method: What inference method to use. Either of SNPE, SNLE or SNRE. num_simulations: Number of simulation calls. More simulations means a longer runtime, but a better posterior estimate. num_workers: Number of parallel workers to use for simulations. Returns: Posterior over parameters conditional on observations (amortized). \"\"\" try : method_fun : Callable = getattr ( sbi . inference , method . upper ()) except AttributeError : raise NameError ( \"Method not available. `method` must be one of 'SNPE', 'SNLE', 'SNRE'.\" ) simulator , prior = prepare_for_sbi ( simulator , prior ) inference = method_fun ( prior = prior ) theta , x = simulate_for_sbi ( simulator = simulator , proposal = prior , num_simulations = num_simulations , num_workers = num_workers , ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () return posterior sbi . utils . user_input_checks . prepare_for_sbi ( simulator , prior ) \u00b6 Prepare simulator and prior for usage in sbi. NOTE: This is a wrapper around process_prior and process_simulator which can be used in isolation as well. Attempts to meet the following requirements by reshaping and type-casting: the simulator function receives as input and returns a Tensor. the simulator can simulate batches of parameters and return batches of data. the prior does not produce batches and samples and evaluates to Tensor. the output shape is a torch.Size((1,N)) (i.e, has a leading batch dimension 1). If this is not possible, a suitable exception will be raised. Parameters: Name Type Description Default simulator Callable Simulator as provided by the user. required prior Prior as provided by the user. required Returns: Type Description Tuple[Callable, torch.distributions.distribution.Distribution] Tuple (simulator, prior) checked and matching the requirements of sbi. Source code in sbi/utils/user_input_checks.py def prepare_for_sbi ( simulator : Callable , prior ) -> Tuple [ Callable , Distribution ]: \"\"\"Prepare simulator and prior for usage in sbi. NOTE: This is a wrapper around `process_prior` and `process_simulator` which can be used in isolation as well. Attempts to meet the following requirements by reshaping and type-casting: - the simulator function receives as input and returns a Tensor.
    - the simulator can simulate batches of parameters and return batches of data.
    - the prior does not produce batches and samples and evaluates to Tensor.
    - the output shape is a `torch.Size((1,N))` (i.e, has a leading batch dimension 1). If this is not possible, a suitable exception will be raised. Args: simulator: Simulator as provided by the user. prior: Prior as provided by the user. Returns: Tuple (simulator, prior) checked and matching the requirements of sbi. \"\"\" # Check prior, return PyTorch prior. prior , _ , prior_returns_numpy = process_prior ( prior ) # Check simulator, returns PyTorch simulator able to simulate batches. simulator = process_simulator ( simulator , prior , prior_returns_numpy ) # Consistency check after making ready for sbi. check_sbi_inputs ( simulator , prior ) return simulator , prior sbi . inference . base . simulate_for_sbi ( simulator , proposal , num_simulations , num_workers = 1 , simulation_batch_size = 1 , seed = None , show_progress_bar = True ) \u00b6 Returns ( \\(\\theta, x\\) ) pairs obtained from sampling the proposal and simulating. This function performs two steps: Sample parameters \\(\\theta\\) from the proposal . Simulate these parameters to obtain \\(x\\) . Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\text{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required proposal Any Probability distribution that the parameters \\(\\theta\\) are sampled from. required num_simulations int Number of simulations that are run. required num_workers int Number of parallel workers to use for simulations. 1 simulation_batch_size int Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). 1 seed Optional[int] Seed for reproducibility. None show_progress_bar bool Whether to show a progress bar for simulating. This will not affect whether there will be a progressbar while drawing samples from the proposal. True Returns: Sampled parameters \\(\\theta\\) and simulation-outputs \\(x\\) . Source code in sbi/inference/base.py def simulate_for_sbi ( simulator : Callable , proposal : Any , num_simulations : int , num_workers : int = 1 , simulation_batch_size : int = 1 , seed : Optional [ int ] = None , show_progress_bar : bool = True , ) -> Tuple [ Tensor , Tensor ]: r \"\"\"Returns ($\\theta, x$) pairs obtained from sampling the proposal and simulating. This function performs two steps: - Sample parameters $\\theta$ from the `proposal`. - Simulate these parameters to obtain $x$. Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\text{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. proposal: Probability distribution that the parameters $\\theta$ are sampled from. num_simulations: Number of simulations that are run. num_workers: Number of parallel workers to use for simulations. simulation_batch_size: Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). seed: Seed for reproducibility. show_progress_bar: Whether to show a progress bar for simulating. This will not affect whether there will be a progressbar while drawing samples from the proposal. Returns: Sampled parameters $\\theta$ and simulation-outputs $x$. \"\"\" theta = proposal . sample (( num_simulations ,)) x = simulate_in_batches ( simulator = simulator , theta = theta , sim_batch_size = simulation_batch_size , num_workers = num_workers , seed = seed , show_progress_bars = show_progress_bar , ) return theta , x sbi.inference.snpe.snpe_a.SNPE_A ( PosteriorEstimator ) \u00b6 __init__ ( self , prior = None , density_estimator = 'mdn_snpe_a' , num_components = 10 , device = 'cpu' , logging_level = 'WARNING' , summary_writer = None , show_progress_bars = True ) special \u00b6 SNPE-A [1]. [1] Fast epsilon-free Inference of Simulation Models with Bayesian Conditional Density Estimation , Papamakarios et al., NeurIPS 2016, https://arxiv.org/abs/1605.06376 . This class implements SNPE-A. SNPE-A trains across multiple rounds with a maximum-likelihood-loss. This will make training converge to the proposal posterior instead of the true posterior. To correct for this, SNPE-A applies a post-hoc correction after training. This correction has to be performed analytically. Thus, SNPE-A is limited to Gaussian distributions for all but the last round. In the last round, SNPE-A can use a Mixture of Gaussians. Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with .log_prob() and .sample() (for example, a PyTorch distribution) can be used. None density_estimator Union[str, Callable] If it is a string (only \u201cmdn_snpe_a\u201d is valid), use a pre-configured mixture of densities network. Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the density estimator. The density estimator needs to provide the methods .log_prob and .sample() . Note that until the last round only a single (multivariate) Gaussian component is used for training (see Algorithm 1 in [1]). In the last round, this component is replicated num_components times, its parameters are perturbed with a very small noise, and then the last training round is done with the expanded Gaussian mixture as estimator for the proposal posterior. 'mdn_snpe_a' num_components int Number of components of the mixture of Gaussians in the last round. This overrides the num_components value passed to posterior_nn() . 10 device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'WARNING' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is /logs .) None show_progress_bars bool Whether to show a progressbar during training. True Source code in sbi/inference/snpe/snpe_a.py def __init__ ( self , prior : Optional [ Distribution ] = None , density_estimator : Union [ str , Callable ] = \"mdn_snpe_a\" , num_components : int = 10 , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"WARNING\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"SNPE-A [1]. [1] _Fast epsilon-free Inference of Simulation Models with Bayesian Conditional Density Estimation_, Papamakarios et al., NeurIPS 2016, https://arxiv.org/abs/1605.06376. This class implements SNPE-A. SNPE-A trains across multiple rounds with a maximum-likelihood-loss. This will make training converge to the proposal posterior instead of the true posterior. To correct for this, SNPE-A applies a post-hoc correction after training. This correction has to be performed analytically. Thus, SNPE-A is limited to Gaussian distributions for all but the last round. In the last round, SNPE-A can use a Mixture of Gaussians. Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. density_estimator: If it is a string (only \"mdn_snpe_a\" is valid), use a pre-configured mixture of densities network. Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the density estimator. The density estimator needs to provide the methods `.log_prob` and `.sample()`. Note that until the last round only a single (multivariate) Gaussian component is used for training (see Algorithm 1 in [1]). In the last round, this component is replicated `num_components` times, its parameters are perturbed with a very small noise, and then the last training round is done with the expanded Gaussian mixture as estimator for the proposal posterior. num_components: Number of components of the mixture of Gaussians in the last round. This overrides the `num_components` value passed to `posterior_nn()`. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `/logs`.) show_progress_bars: Whether to show a progressbar during training. \"\"\" # Catch invalid inputs. if not (( density_estimator == \"mdn_snpe_a\" ) or callable ( density_estimator )): raise TypeError ( \"The `density_estimator` passed to SNPE_A needs to be a \" \"callable or the string 'mdn_snpe_a'!\" ) # `num_components` will be used to replicate the Gaussian in the last round. self . _num_components = num_components self . _ran_final_round = False # WARNING: sneaky trick ahead. We proxy the parent's `train` here, # requiring the signature to have `num_atoms`, save it for use below, and # continue. It's sneaky because we are using the object (self) as a namespace # to pass arguments between functions, and that's implicit state management. kwargs = utils . del_entries ( locals (), entries = ( \"self\" , \"__class__\" , \"num_components\" ), ) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , proposal = None , exclude_invalid_x = None , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required proposal Optional[sbi.inference.posteriors.direct_posterior.DirectPosterior] The distribution that the parameters \\(\\theta\\) were sampled from. Pass None if the parameters were sampled from the prior. If not None , it will trigger a different loss-function. None exclude_invalid_x Optional[bool] Whether invalid simulations are discarded during training. For single-round SNPE, it is fine to discard invalid simulations, but for multi-round SNPE (atomic), discarding invalid simulations gives systematically wrong results. If None , it will be True in the first round and False in later rounds. None data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description PosteriorEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snpe/snpe_a.py def append_simulations ( self , theta : Tensor , x : Tensor , proposal : Optional [ DirectPosterior ] = None , exclude_invalid_x : Optional [ bool ] = None , data_device : Optional [ str ] = None , ) -> \"PosteriorEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. proposal: The distribution that the parameters $\\theta$ were sampled from. Pass `None` if the parameters were sampled from the prior. If not `None`, it will trigger a different loss-function. exclude_invalid_x: Whether invalid simulations are discarded during training. For single-round SNPE, it is fine to discard invalid simulations, but for multi-round SNPE (atomic), discarding invalid simulations gives systematically wrong results. If `None`, it will be `True` in the first round and `False` in later rounds. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" if ( proposal is None or proposal is self . _prior or ( isinstance ( proposal , RestrictedPrior ) and proposal . _prior is self . _prior ) ): # The `_data_round_index` will later be used to infer if one should train # with MLE loss or with atomic loss (see, in `train()`: # self._round = max(self._data_round_index)) current_round = 0 else : if not self . _data_round_index : # This catches a pretty specific case: if, in the first round, one # passes data that does not come from the prior. current_round = 1 else : current_round = max ( self . _data_round_index ) + 1 if exclude_invalid_x is None : if current_round == 0 : exclude_invalid_x = True else : exclude_invalid_x = False if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x = exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) if ( type ( self ) . __name__ == \"SNPE_C\" and current_round > 0 and not self . use_non_atomic_loss ): nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"Multiround SNPE-C (atomic)\" , ) else : npe_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"Single-round NPE\" ) self . _check_proposal ( proposal ) self . _data_round_index . append ( current_round ) prior_masks = mask_sims_from_prior ( int ( current_round > 0 ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _proposal_roundwise . append ( proposal ) if self . _prior is None or isinstance ( self . _prior , ImproperEmpirical ): if proposal is not None : raise ValueError ( \"You had not passed a prior at initialization, but now you \" \"passed a proposal. If you want to run multi-round SNPE, you have \" \"to specify a prior (set the `.prior` argument or re-initialize \" \"the object with a prior distribution). If the samples you passed \" \"to `append_simulations()` were sampled from the prior, you can \" \"run single-round inference with \" \"`append_simulations(..., proposal=None)`.\" ) theta_prior = self . get_simulations ()[ 0 ] . to ( self . _device ) self . _prior = ImproperEmpirical ( theta_prior , ones ( theta_prior . shape [ 0 ], device = self . _device ) ) return self build_posterior ( self , density_estimator = None , prior = None ) \u00b6 Build posterior from the neural density estimator. This method first corrects the estimated density with correct_for_proposal and then returns a DirectPosterior . Parameters: Name Type Description Default density_estimator Optional[Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None Returns: Type Description DirectPosterior Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods. Source code in sbi/inference/snpe/snpe_a.py def build_posterior ( self , density_estimator : Optional [ TorchModule ] = None , prior : Optional [ Distribution ] = None , ) -> \"DirectPosterior\" : r \"\"\"Build posterior from the neural density estimator. This method first corrects the estimated density with `correct_for_proposal` and then returns a `DirectPosterior`. Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods. \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNPE_A(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior wrapped_density_estimator = self . correct_for_proposal ( density_estimator = density_estimator ) self . _posterior = DirectPosterior ( posterior_estimator = wrapped_density_estimator , # type: ignore prior = prior , ) return deepcopy ( self . _posterior ) correct_for_proposal ( self , density_estimator = None ) \u00b6 Build mixture of Gaussians that approximates the posterior. Returns a SNPE_A_MDN object, which applies the posthoc-correction required in SNPE-A. Parameters: Name Type Description Default density_estimator Optional[Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None Returns: Type Description SNPE_A_MDN Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods. Source code in sbi/inference/snpe/snpe_a.py def correct_for_proposal ( self , density_estimator : Optional [ TorchModule ] = None , ) -> \"SNPE_A_MDN\" : r \"\"\"Build mixture of Gaussians that approximates the posterior. Returns a `SNPE_A_MDN` object, which applies the posthoc-correction required in SNPE-A. Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods. \"\"\" if density_estimator is None : density_estimator = deepcopy ( self . _neural_net ) # PosteriorEstimator.train() also returns a deepcopy, mimic this here # If internal net is used device is defined. device = self . _device else : # Otherwise, infer it from the device of the net parameters. device = str ( next ( density_estimator . parameters ()) . device ) # Set proposal of the density estimator. # This also evokes the z-scoring correction if necessary. if ( self . _proposal_roundwise [ - 1 ] is self . _prior or self . _proposal_roundwise [ - 1 ] is None ): proposal = self . _prior assert isinstance ( proposal , ( MultivariateNormal , utils . BoxUniform ) ), \"\"\"Prior must be `torch.distributions.MultivariateNormal` or `sbi.utils. BoxUniform`\"\"\" else : assert isinstance ( self . _proposal_roundwise [ - 1 ], DirectPosterior ), \"\"\"The proposal you passed to `append_simulations` is neither the prior nor a `DirectPosterior`. SNPE-A currently only supports these scenarios. \"\"\" proposal = self . _proposal_roundwise [ - 1 ] # Create the SNPE_A_MDN wrapped_density_estimator = SNPE_A_MDN ( flow = density_estimator , # type: ignore proposal = proposal , prior = self . _prior , device = device , ) return wrapped_density_estimator get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snpe/snpe_a.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snpe/snpe_a.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , final_round = False , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , calibration_kernel = None , resume_training = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None , component_perturbation = 0.005 ) \u00b6 Return density estimator that approximates the proposal posterior. [1] Fast epsilon-free Inference of Simulation Models with Bayesian Conditional Density Estimation , Papamakarios et al., NeurIPS 2016, https://arxiv.org/abs/1605.06376 . Training is performed with maximum likelihood on samples from the latest round, which leads the algorithm to converge to the proposal posterior. Parameters: Name Type Description Default final_round bool Whether we are in the last round of training or not. For all but the last round, Algorithm 1 from [1] is executed. In last the round, Algorithm 2 from [1] is executed once. False training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 calibration_kernel Optional[Callable] A function to calibrate the loss with respect to the simulations x . See Lueckmann, Gon\u00e7alves et al., NeurIPS 2017. None resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False force_first_round_loss If True , train with maximum likelihood, i.e., potentially ignoring the correction for using a proposal distribution different from the prior. required retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. Not supported for SNPE-A. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None component_perturbation float The standard deviation applied to all weights and biases when, in the last round, the Mixture of Gaussians is build from a single Gaussian. This value can be problem-specific and also depends on the number of mixture components. 0.005 Returns: Type Description Module Density estimator that approximates the distribution \\(p(\\theta|x)\\) . Source code in sbi/inference/snpe/snpe_a.py def train ( self , final_round : bool = False , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , calibration_kernel : Optional [ Callable ] = None , resume_training : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , component_perturbation : float = 5e-3 , ) -> nn . Module : r \"\"\"Return density estimator that approximates the proposal posterior. [1] _Fast epsilon-free Inference of Simulation Models with Bayesian Conditional Density Estimation_, Papamakarios et al., NeurIPS 2016, https://arxiv.org/abs/1605.06376. Training is performed with maximum likelihood on samples from the latest round, which leads the algorithm to converge to the proposal posterior. Args: final_round: Whether we are in the last round of training or not. For all but the last round, Algorithm 1 from [1] is executed. In last the round, Algorithm 2 from [1] is executed once. training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. calibration_kernel: A function to calibrate the loss with respect to the simulations `x`. See Lueckmann, Gon\u00e7alves et al., NeurIPS 2017. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. force_first_round_loss: If `True`, train with maximum likelihood, i.e., potentially ignoring the correction for using a proposal distribution different from the prior. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. Not supported for SNPE-A. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) component_perturbation: The standard deviation applied to all weights and biases when, in the last round, the Mixture of Gaussians is build from a single Gaussian. This value can be problem-specific and also depends on the number of mixture components. Returns: Density estimator that approximates the distribution $p(\\theta|x)$. \"\"\" assert not retrain_from_scratch , \"\"\"Retraining from scratch is not supported in SNPE-A yet. The reason for this is that, if we reininitialized the density estimator, the z-scoring would change, which would break the posthoc correction. This is a pure implementation issue.\"\"\" kwargs = utils . del_entries ( locals (), entries = ( \"self\" , \"__class__\" , \"final_round\" , \"component_perturbation\" , ), ) # SNPE-A always discards the prior samples. kwargs [ \"discard_prior_samples\" ] = True kwargs [ \"force_first_round_loss\" ] = True self . _round = max ( self . _data_round_index ) if final_round : # If there is (will be) only one round, train with Algorithm 2 from [1]. if self . _round == 0 : self . _build_neural_net = partial ( self . _build_neural_net , num_components = self . _num_components ) # Run Algorithm 2 from [1]. elif not self . _ran_final_round : # Now switch to the specified number of components. This method will # only be used if `retrain_from_scratch=True`. Otherwise, # the MDN will be built from replicating the single-component net for # `num_component` times (via `_expand_mog()`). self . _build_neural_net = partial ( self . _build_neural_net , num_components = self . _num_components ) # Extend the MDN to the originally desired number of components. self . _expand_mog ( eps = component_perturbation ) else : warnings . warn ( \"You have already run SNPE-A with `final_round=True`. Running it\" \"again with this setting will not allow computing the posthoc\" \"correction applied in SNPE-A. Thus, you will get an error when \" \"calling `.build_posterior()` after training.\" , UserWarning , ) else : # Run Algorithm 1 from [1]. # Wrap the function that builds the MDN such that we can make # sure that there is only one component when running. self . _build_neural_net = partial ( self . _build_neural_net , num_components = 1 ) if final_round : self . _ran_final_round = True return super () . train ( ** kwargs ) sbi.inference.snpe.snpe_c.SNPE_C ( PosteriorEstimator ) \u00b6 __init__ ( self , prior = None , density_estimator = 'maf' , device = 'cpu' , logging_level = 'WARNING' , summary_writer = None , show_progress_bars = True ) special \u00b6 SNPE-C / APT [1]. [1] Automatic Posterior Transformation for Likelihood-free Inference , Greenberg et al., ICML 2019, https://arxiv.org/abs/1905.07488 . This class implements two loss variants of SNPE-C: the non-atomic and the atomic version. The atomic loss of SNPE-C can be used for any density estimator, i.e. also for normalizing flows. However, it suffers from leakage issues. On the other hand, the non-atomic loss can only be used only if the proposal distribution is a mixture of Gaussians, the density estimator is a mixture of Gaussians, and the prior is either Gaussian or Uniform. It does not suffer from leakage issues. At the beginning of each round, we print whether the non-atomic or the atomic version is used. In this codebase, we will automatically switch to the non-atomic loss if the following criteria are fulfilled: - proposal is a DirectPosterior with density_estimator mdn , as built with utils.sbi.posterior_nn() . - the density estimator is a mdn , as built with utils.sbi.posterior_nn() . - isinstance(prior, MultivariateNormal) (from torch.distributions ) or isinstance(prior, sbi.utils.BoxUniform) Note that custom implementations of any of these densities (or estimators) will not trigger the non-atomic loss, and the algorithm will fall back onto using the atomic loss. Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. None density_estimator Union[str, Callable] If it is a string, use a pre-configured network of the provided type (one of nsf, maf, mdn, made). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the density estimator. The density estimator needs to provide the methods .log_prob and .sample() . 'maf' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'WARNING' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is /logs .) None show_progress_bars bool Whether to show a progressbar during training. True Source code in sbi/inference/snpe/snpe_c.py def __init__ ( self , prior : Optional [ Distribution ] = None , density_estimator : Union [ str , Callable ] = \"maf\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"WARNING\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"SNPE-C / APT [1]. [1] _Automatic Posterior Transformation for Likelihood-free Inference_, Greenberg et al., ICML 2019, https://arxiv.org/abs/1905.07488. This class implements two loss variants of SNPE-C: the non-atomic and the atomic version. The atomic loss of SNPE-C can be used for any density estimator, i.e. also for normalizing flows. However, it suffers from leakage issues. On the other hand, the non-atomic loss can only be used only if the proposal distribution is a mixture of Gaussians, the density estimator is a mixture of Gaussians, and the prior is either Gaussian or Uniform. It does not suffer from leakage issues. At the beginning of each round, we print whether the non-atomic or the atomic version is used. In this codebase, we will automatically switch to the non-atomic loss if the following criteria are fulfilled:
    - proposal is a `DirectPosterior` with density_estimator `mdn`, as built with `utils.sbi.posterior_nn()`.
    - the density estimator is a `mdn`, as built with `utils.sbi.posterior_nn()`.
    - `isinstance(prior, MultivariateNormal)` (from `torch.distributions`) or `isinstance(prior, sbi.utils.BoxUniform)` Note that custom implementations of any of these densities (or estimators) will not trigger the non-atomic loss, and the algorithm will fall back onto using the atomic loss. Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. density_estimator: If it is a string, use a pre-configured network of the provided type (one of nsf, maf, mdn, made). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the density estimator. The density estimator needs to provide the methods `.log_prob` and `.sample()`. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `/logs`.) show_progress_bars: Whether to show a progressbar during training. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , proposal = None , exclude_invalid_x = None , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required proposal Optional[sbi.inference.posteriors.direct_posterior.DirectPosterior] The distribution that the parameters \\(\\theta\\) were sampled from. Pass None if the parameters were sampled from the prior. If not None , it will trigger a different loss-function. None exclude_invalid_x Optional[bool] Whether invalid simulations are discarded during training. For single-round SNPE, it is fine to discard invalid simulations, but for multi-round SNPE (atomic), discarding invalid simulations gives systematically wrong results. If None , it will be True in the first round and False in later rounds. None data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description PosteriorEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snpe/snpe_c.py def append_simulations ( self , theta : Tensor , x : Tensor , proposal : Optional [ DirectPosterior ] = None , exclude_invalid_x : Optional [ bool ] = None , data_device : Optional [ str ] = None , ) -> \"PosteriorEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. proposal: The distribution that the parameters $\\theta$ were sampled from. Pass `None` if the parameters were sampled from the prior. If not `None`, it will trigger a different loss-function. exclude_invalid_x: Whether invalid simulations are discarded during training. For single-round SNPE, it is fine to discard invalid simulations, but for multi-round SNPE (atomic), discarding invalid simulations gives systematically wrong results. If `None`, it will be `True` in the first round and `False` in later rounds. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" if ( proposal is None or proposal is self . _prior or ( isinstance ( proposal , RestrictedPrior ) and proposal . _prior is self . _prior ) ): # The `_data_round_index` will later be used to infer if one should train # with MLE loss or with atomic loss (see, in `train()`: # self._round = max(self._data_round_index)) current_round = 0 else : if not self . _data_round_index : # This catches a pretty specific case: if, in the first round, one # passes data that does not come from the prior. current_round = 1 else : current_round = max ( self . _data_round_index ) + 1 if exclude_invalid_x is None : if current_round == 0 : exclude_invalid_x = True else : exclude_invalid_x = False if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x = exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) if ( type ( self ) . __name__ == \"SNPE_C\" and current_round > 0 and not self . use_non_atomic_loss ): nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"Multiround SNPE-C (atomic)\" , ) else : npe_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"Single-round NPE\" ) self . _check_proposal ( proposal ) self . _data_round_index . append ( current_round ) prior_masks = mask_sims_from_prior ( int ( current_round > 0 ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _proposal_roundwise . append ( proposal ) if self . _prior is None or isinstance ( self . _prior , ImproperEmpirical ): if proposal is not None : raise ValueError ( \"You had not passed a prior at initialization, but now you \" \"passed a proposal. If you want to run multi-round SNPE, you have \" \"to specify a prior (set the `.prior` argument or re-initialize \" \"the object with a prior distribution). If the samples you passed \" \"to `append_simulations()` were sampled from the prior, you can \" \"run single-round inference with \" \"`append_simulations(..., proposal=None)`.\" ) theta_prior = self . get_simulations ()[ 0 ] . to ( self . _device ) self . _prior = ImproperEmpirical ( theta_prior , ones ( theta_prior . shape [ 0 ], device = self . _device ) ) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'rejection' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. For SNPE, the posterior distribution that is returned here implements the following functionality over the raw neural density estimator: - correct the calculation of the log probability such that it compensates for the leakage. - reject samples that lie outside of the prior bounds. - alternatively, if leakage is very high (which can happen for multi-round SNPE), sample from the posterior with MCMC. Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'rejection' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior or DirectPosterior . By default, DirectPosterior is used. Only if rejection_sampling_parameters contains proposal , a RejectionPosterior is instantiated. {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior, sbi.inference.posteriors.direct_posterior.DirectPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snpe/snpe_c.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"rejection\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior , DirectPosterior ]: r \"\"\"Build posterior from the neural density estimator. For SNPE, the posterior distribution that is returned here implements the following functionality over the raw neural density estimator: - correct the calculation of the log probability such that it compensates for the leakage. - reject samples that lie outside of the prior bounds. - alternatively, if leakage is very high (which can happen for multi-round SNPE), sample from the posterior with MCMC. Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior` or `DirectPosterior`. By default, `DirectPosterior` is used. Only if `rejection_sampling_parameters` contains `proposal`, a `RejectionPosterior` is instantiated. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert self . _prior is not None , ( \"You did not pass a prior. You have to pass the prior either at \" \"initialization `inference = SNPE(prior)` or to \" \"`.build_posterior(prior=prior)`.\" ) prior = self . _prior else : utils . check_prior ( prior ) if density_estimator is None : posterior_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : posterior_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = posterior_estimator_based_potential ( posterior_estimator = posterior_estimator , prior = prior , x_o = None , ) if sample_with == \"rejection\" : if \"proposal\" in rejection_sampling_parameters . keys (): self . _posterior = RejectionPosterior ( potential_fn = potential_fn , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) else : self . _posterior = DirectPosterior ( posterior_estimator = posterior_estimator , # type: ignore prior = prior , x_shape = self . _x_shape , device = device , ) elif sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snpe/snpe_c.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snpe/snpe_c.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , num_atoms = 10 , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , calibration_kernel = None , resume_training = False , force_first_round_loss = False , discard_prior_samples = False , use_combined_loss = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None ) \u00b6 Return density estimator that approximates the distribution \\(p(\\theta|x)\\) . Parameters: Name Type Description Default num_atoms int Number of atoms to use for classification. 10 training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 calibration_kernel Optional[Callable] A function to calibrate the loss with respect to the simulations x . See Lueckmann, Gon\u00e7alves et al., NeurIPS 2017. None resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False force_first_round_loss bool If True , train with maximum likelihood, i.e., potentially ignoring the correction for using a proposal distribution different from the prior. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False use_combined_loss bool Whether to train the neural net also on prior samples using maximum likelihood in addition to training it on all samples using atomic loss. The extra MLE loss helps prevent density leaking with bounded priors. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None Returns: Type Description Module Density estimator that approximates the distribution \\(p(\\theta|x)\\) . Source code in sbi/inference/snpe/snpe_c.py def train ( self , num_atoms : int = 10 , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , calibration_kernel : Optional [ Callable ] = None , resume_training : bool = False , force_first_round_loss : bool = False , discard_prior_samples : bool = False , use_combined_loss : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , ) -> nn . Module : r \"\"\"Return density estimator that approximates the distribution $p(\\theta|x)$. Args: num_atoms: Number of atoms to use for classification. training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. calibration_kernel: A function to calibrate the loss with respect to the simulations `x`. See Lueckmann, Gon\u00e7alves et al., NeurIPS 2017. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. force_first_round_loss: If `True`, train with maximum likelihood, i.e., potentially ignoring the correction for using a proposal distribution different from the prior. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. use_combined_loss: Whether to train the neural net also on prior samples using maximum likelihood in addition to training it on all samples using atomic loss. The extra MLE loss helps prevent density leaking with bounded priors. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) Returns: Density estimator that approximates the distribution $p(\\theta|x)$. \"\"\" # WARNING: sneaky trick ahead. We proxy the parent's `train` here, # requiring the signature to have `num_atoms`, save it for use below, and # continue. It's sneaky because we are using the object (self) as a namespace # to pass arguments between functions, and that's implicit state management. self . _num_atoms = num_atoms self . _use_combined_loss = use_combined_loss kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" , \"num_atoms\" , \"use_combined_loss\" ), ) self . _round = max ( self . _data_round_index ) if self . _round > 0 : # Set the proposal to the last proposal that was passed by the user. For # atomic SNPE, it does not matter what the proposal is. For non-atomic # SNPE, we only use the latest data that was passed, i.e. the one from the # last proposal. proposal = self . _proposal_roundwise [ - 1 ] self . use_non_atomic_loss = ( isinstance ( proposal , DirectPosterior ) and isinstance ( proposal . posterior_estimator . _distribution , mdn ) and isinstance ( self . _neural_net . _distribution , mdn ) and check_dist_class ( self . _prior , class_to_check = ( Uniform , MultivariateNormal ) )[ 0 ] ) algorithm = \"non-atomic\" if self . use_non_atomic_loss else \"atomic\" print ( f \"Using SNPE-C with { algorithm } loss\" ) if self . use_non_atomic_loss : # Take care of z-scoring, pre-compute and store prior terms. self . _set_state_for_mog_proposal () return super () . train ( ** kwargs ) sbi.inference.snle.snle_a.SNLE_A ( LikelihoodEstimator ) \u00b6 __init__ ( self , prior = None , density_estimator = 'maf' , device = 'cpu' , logging_level = 'WARNING' , summary_writer = None , show_progress_bars = True ) special \u00b6 Sequential Neural Likelihood [1]. [1] Sequential Neural Likelihood: Fast Likelihood-free Inference with Autoregressive Flows_, Papamakarios et al., AISTATS 2019, https://arxiv.org/abs/1805.07226 Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If None , the prior must be passed to .build_posterior() . None density_estimator Union[str, Callable] If it is a string, use a pre-configured network of the provided type (one of nsf, maf, mdn, made). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the density estimator. The density estimator needs to provide the methods .log_prob and .sample() . 'maf' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'WARNING' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is /logs .) None show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/snle/snle_a.py def __init__ ( self , prior : Optional [ Distribution ] = None , density_estimator : Union [ str , Callable ] = \"maf\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"WARNING\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"Sequential Neural Likelihood [1]. [1] Sequential Neural Likelihood: Fast Likelihood-free Inference with Autoregressive Flows_, Papamakarios et al., AISTATS 2019, https://arxiv.org/abs/1805.07226 Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If `None`, the prior must be passed to `.build_posterior()`. density_estimator: If it is a string, use a pre-configured network of the provided type (one of nsf, maf, mdn, made). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the density estimator. The density estimator needs to provide the methods `.log_prob` and `.sample()`. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , exclude_invalid_x = False , from_round = 0 , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required exclude_invalid_x bool Whether invalid simulations are discarded during training. If False , SNLE raises an error when invalid simulations are found. If True , invalid simulations are discarded and training can proceed, but this gives systematically wrong results. False from_round int Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for SNLE . Only when the user later on requests .train(discard_prior_samples=True) , we use these indices to find which training data stemmed from the prior. 0 data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description LikelihoodEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snle/snle_a.py def append_simulations ( self , theta : Tensor , x : Tensor , exclude_invalid_x : bool = False , from_round : int = 0 , data_device : Optional [ str ] = None , ) -> \"LikelihoodEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. exclude_invalid_x: Whether invalid simulations are discarded during training. If `False`, SNLE raises an error when invalid simulations are found. If `True`, invalid simulations are discarded and training can proceed, but this gives systematically wrong results. from_round: Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for `SNLE`. Only when the user later on requests `.train(discard_prior_samples=True)`, we use these indices to find which training data stemmed from the prior. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"SNLE\" ) if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) prior_masks = mask_sims_from_prior ( int ( from_round ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _data_round_index . append ( int ( from_round )) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'mcmc' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. SNLE trains a neural network to approximate the likelihood \\(p(x|\\theta)\\) . The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability \\(p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)\\) and draw samples from the posterior with MCMC or rejection sampling. Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'mcmc' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior . {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snle/snle_a.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"mcmc\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior ]: r \"\"\"Build posterior from the neural density estimator. SNLE trains a neural network to approximate the likelihood $p(x|\\theta)$. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability $p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)$ and draw samples from the posterior with MCMC or rejection sampling. Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior`. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNLE(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior else : check_prior ( prior ) if density_estimator is None : likelihood_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : likelihood_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = likelihood_estimator_based_potential ( likelihood_estimator = likelihood_estimator , prior = prior , x_o = None , ) if sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"rejection\" : self . _posterior = RejectionPosterior ( potential_fn = potential_fn , proposal = prior , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snle/snle_a.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snle/snle_a.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , resume_training = False , discard_prior_samples = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None ) inherited \u00b6 Train the density estimator to learn the distribution \\(p(x|\\theta)\\) . Parameters: Name Type Description Default resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None Returns: Type Description Flow Density estimator that has learned the distribution \\(p(x|\\theta)\\) . Source code in sbi/inference/snle/snle_a.py def train ( self , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , resume_training : bool = False , discard_prior_samples : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , ) -> flows . Flow : r \"\"\"Train the density estimator to learn the distribution $p(x|\\theta)$. Args: resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) Returns: Density estimator that has learned the distribution $p(x|\\theta)$. \"\"\" # Load data from most recent round. self . _round = max ( self . _data_round_index ) # Starting index for the training set (1 = discard round-0 samples). start_idx = int ( discard_prior_samples and self . _round > 0 ) train_loader , val_loader = self . get_dataloaders ( start_idx , training_batch_size , validation_fraction , resume_training , dataloader_kwargs = dataloader_kwargs , ) # First round or if retraining from scratch: # Call the `self._build_neural_net` with the rounds' thetas and xs as # arguments, which will build the neural network # This is passed into NeuralPosterior, to create a neural posterior which # can `sample()` and `log_prob()`. The network is accessible via `.net`. if self . _neural_net is None or retrain_from_scratch : # Get theta,x to initialize NN theta , x , _ = self . get_simulations ( starting_round = start_idx ) # Use only training data for building the neural net (z-scoring transforms) self . _neural_net = self . _build_neural_net ( theta [ self . train_indices ] . to ( \"cpu\" ), x [ self . train_indices ] . to ( \"cpu\" ), ) self . _x_shape = x_shape_from_simulation ( x . to ( \"cpu\" )) del theta , x assert ( len ( self . _x_shape ) < 3 ), \"SNLE cannot handle multi-dimensional simulator output.\" self . _neural_net . to ( self . _device ) if not resume_training : self . optimizer = optim . Adam ( list ( self . _neural_net . parameters ()), lr = learning_rate , ) self . epoch , self . _val_log_prob = 0 , float ( \"-Inf\" ) while self . epoch <= max_num_epochs and not self . _converged ( self . epoch , stop_after_epochs ): # Train for a single epoch. self . _neural_net . train () train_log_probs_sum = 0 for batch in train_loader : self . optimizer . zero_grad () theta_batch , x_batch = ( batch [ 0 ] . to ( self . _device ), batch [ 1 ] . to ( self . _device ), ) # Evaluate on x with theta as context. train_losses = self . _loss ( theta = theta_batch , x = x_batch ) train_loss = torch . mean ( train_losses ) train_log_probs_sum -= train_losses . sum () . item () train_loss . backward () if clip_max_norm is not None : clip_grad_norm_ ( self . _neural_net . parameters (), max_norm = clip_max_norm , ) self . optimizer . step () self . epoch += 1 train_log_prob_average = train_log_probs_sum / ( len ( train_loader ) * train_loader . batch_size # type: ignore ) self . _summary [ \"training_log_probs\" ] . append ( train_log_prob_average ) # Calculate validation performance. self . _neural_net . eval () val_log_prob_sum = 0 with torch . no_grad (): for batch in val_loader : theta_batch , x_batch = ( batch [ 0 ] . to ( self . _device ), batch [ 1 ] . to ( self . _device ), ) # Evaluate on x with theta as context. val_losses = self . _loss ( theta = theta_batch , x = x_batch ) val_log_prob_sum -= val_losses . sum () . item () # Take mean over all validation samples. self . _val_log_prob = val_log_prob_sum / ( len ( val_loader ) * val_loader . batch_size # type: ignore ) # Log validation log prob for every epoch. self . _summary [ \"validation_log_probs\" ] . append ( self . _val_log_prob ) self . _maybe_show_progress ( self . _show_progress_bars , self . epoch ) self . _report_convergence_at_end ( self . epoch , stop_after_epochs , max_num_epochs ) # Update summary. self . _summary [ \"epochs_trained\" ] . append ( self . epoch ) self . _summary [ \"best_validation_log_prob\" ] . append ( self . _best_val_log_prob ) # Update TensorBoard and summary dict. self . _summarize ( round_ = self . _round ) # Update description for progress bar. if show_train_summary : print ( self . _describe_round ( self . _round , self . _summary )) # Avoid keeping the gradients in the resulting network, which can # cause memory leakage when benchmarking. self . _neural_net . zero_grad ( set_to_none = True ) return deepcopy ( self . _neural_net ) sbi.inference.snre.snre_a.SNRE_A ( RatioEstimator ) \u00b6 __init__ ( self , prior = None , classifier = 'resnet' , device = 'cpu' , logging_level = 'warning' , summary_writer = None , show_progress_bars = True ) special \u00b6 AALR[1], here known as SNRE_A. [1] Likelihood-free MCMC with Amortized Approximate Likelihood Ratios , Hermans et al., ICML 2020, https://arxiv.org/abs/1903.04057 Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If None , the prior must be passed to .build_posterior() . None classifier Union[str, Callable] Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the classifier. 'resnet' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'warning' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is /logs .) None show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/snre/snre_a.py def __init__ ( self , prior : Optional [ Distribution ] = None , classifier : Union [ str , Callable ] = \"resnet\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"warning\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"AALR[1], here known as SNRE_A. [1] _Likelihood-free MCMC with Amortized Approximate Likelihood Ratios_, Hermans et al., ICML 2020, https://arxiv.org/abs/1903.04057 Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If `None`, the prior must be passed to `.build_posterior()`. classifier: Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the classifier. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , exclude_invalid_x = False , from_round = 0 , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required exclude_invalid_x bool Whether invalid simulations are discarded during training. If False , SNRE raises an error when invalid simulations are found. If True , invalid simulations are discarded and training can proceed, but this gives systematically wrong results. False from_round int Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for SNRE . Only when the user later on requests .train(discard_prior_samples=True) , we use these indices to find which training data stemmed from the prior. 0 data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description RatioEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snre/snre_a.py def append_simulations ( self , theta : Tensor , x : Tensor , exclude_invalid_x : bool = False , from_round : int = 0 , data_device : Optional [ str ] = None , ) -> \"RatioEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. exclude_invalid_x: Whether invalid simulations are discarded during training. If `False`, SNRE raises an error when invalid simulations are found. If `True`, invalid simulations are discarded and training can proceed, but this gives systematically wrong results. from_round: Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for `SNRE`. Only when the user later on requests `.train(discard_prior_samples=True)`, we use these indices to find which training data stemmed from the prior. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"SNRE\" ) if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) prior_masks = mask_sims_from_prior ( int ( from_round ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _data_round_index . append ( int ( from_round )) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'mcmc' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability \\(p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)\\) and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the normalized posterior, but sampling still requires MCMC (or rejection sampling). Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'mcmc' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note that some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior . {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snre/snre_a.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"mcmc\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior ]: r \"\"\"Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability $p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)$ and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the **normalized** posterior, but sampling still requires MCMC (or rejection sampling). Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note that some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior`. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNRE(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior else : check_prior ( prior ) if density_estimator is None : ratio_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : ratio_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = ratio_estimator_based_potential ( ratio_estimator = ratio_estimator , prior = prior , x_o = None , ) if sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"rejection\" : self . _posterior = RejectionPosterior ( potential_fn = potential_fn , proposal = prior , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snre/snre_a.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snre/snre_a.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , resume_training = False , discard_prior_samples = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None , loss_kwargs = {}) \u00b6 Return classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Parameters: Name Type Description Default training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None loss_kwargs Dict[str, Any] Additional or updated kwargs to be passed to the self._loss fn. {} Returns: Type Description Module Classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Source code in sbi/inference/snre/snre_a.py def train ( self , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , resume_training : bool = False , discard_prior_samples : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , loss_kwargs : Dict [ str , Any ] = {}, ) -> nn . Module : r \"\"\"Return classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. Args: training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) loss_kwargs: Additional or updated kwargs to be passed to the self._loss fn. Returns: Classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. \"\"\" # AALR is defined for `num_atoms=2`. # Proxy to `super().__call__` to ensure right parameter. kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) return super () . train ( ** kwargs , num_atoms = 2 ) sbi.inference.snre.snre_b.SNRE_B ( RatioEstimator ) \u00b6 __init__ ( self , prior = None , classifier = 'resnet' , device = 'cpu' , logging_level = 'warning' , summary_writer = None , show_progress_bars = True ) special \u00b6 SRE[1], here known as SNRE_B. [1] On Contrastive Learning for Likelihood-free Inference , Durkan et al., ICML 2020, https://arxiv.org/pdf/2002.03712 Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If None , the prior must be passed to .build_posterior() . None classifier Union[str, Callable] Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the classifier. 'resnet' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'warning' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is /logs .) None show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/snre/snre_b.py def __init__ ( self , prior : Optional [ Distribution ] = None , classifier : Union [ str , Callable ] = \"resnet\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"warning\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"SRE[1], here known as SNRE_B. [1] _On Contrastive Learning for Likelihood-free Inference_, Durkan et al., ICML 2020, https://arxiv.org/pdf/2002.03712 Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If `None`, the prior must be passed to `.build_posterior()`. classifier: Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the classifier. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , exclude_invalid_x = False , from_round = 0 , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required exclude_invalid_x bool Whether invalid simulations are discarded during training. If False , SNRE raises an error when invalid simulations are found. If True , invalid simulations are discarded and training can proceed, but this gives systematically wrong results. False from_round int Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for SNRE . Only when the user later on requests .train(discard_prior_samples=True) , we use these indices to find which training data stemmed from the prior. 0 data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description RatioEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snre/snre_b.py def append_simulations ( self , theta : Tensor , x : Tensor , exclude_invalid_x : bool = False , from_round : int = 0 , data_device : Optional [ str ] = None , ) -> \"RatioEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. exclude_invalid_x: Whether invalid simulations are discarded during training. If `False`, SNRE raises an error when invalid simulations are found. If `True`, invalid simulations are discarded and training can proceed, but this gives systematically wrong results. from_round: Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for `SNRE`. Only when the user later on requests `.train(discard_prior_samples=True)`, we use these indices to find which training data stemmed from the prior. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"SNRE\" ) if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) prior_masks = mask_sims_from_prior ( int ( from_round ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _data_round_index . append ( int ( from_round )) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'mcmc' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability \\(p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)\\) and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the normalized posterior, but sampling still requires MCMC (or rejection sampling). Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'mcmc' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note that some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior . {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snre/snre_b.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"mcmc\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior ]: r \"\"\"Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability $p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)$ and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the **normalized** posterior, but sampling still requires MCMC (or rejection sampling). Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note that some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior`. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNRE(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior else : check_prior ( prior ) if density_estimator is None : ratio_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : ratio_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = ratio_estimator_based_potential ( ratio_estimator = ratio_estimator , prior = prior , x_o = None , ) if sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"rejection\" : self . _posterior = RejectionPosterior ( potential_fn = potential_fn , proposal = prior , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snre/snre_b.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snre/snre_b.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , num_atoms = 10 , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , resume_training = False , discard_prior_samples = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None ) \u00b6 Return classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Parameters: Name Type Description Default num_atoms int Number of atoms to use for classification. 10 training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None Returns: Type Description Module Classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Source code in sbi/inference/snre/snre_b.py def train ( self , num_atoms : int = 10 , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , resume_training : bool = False , discard_prior_samples : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , ) -> nn . Module : r \"\"\"Return classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. Args: num_atoms: Number of atoms to use for classification. training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) Returns: Classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) return super () . train ( ** kwargs ) sbi.inference.snre.snre_c.SNRE_C ( RatioEstimator ) \u00b6 __init__ ( self , prior = None , classifier = 'resnet' , device = 'cpu' , logging_level = 'warning' , summary_writer = None , show_progress_bars = True ) special \u00b6 NRE-C[1] is a generalization of the non-sequential (amortized) versions of SNRE_A and SNRE_B. We call the algorithm SNRE_C within sbi . NRE-C: (1) like SNRE_B, features a \u201cmulticlass\u201d loss function where several marginally drawn parameter-data pairs are contrasted against a jointly drawn pair. (2) like AALR/NRE_A, i.e., the non-sequential version of SNRE_A, it encourages the approximate ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) , accessed through .potential() within sbi , to be exact at optimum. This addresses the issue that SNRE_B estimates this ratio only up to an arbitrary function (normalizing constant) of the data \\(x\\) . Just like for all ratio estimation algorithms, the sequential version of SNRE_C will be estimated only up to a function (normalizing constant) of the data \\(x\\) in rounds after the first. [1] Contrastive Neural Ratio Estimation , Benajmin Kurt Miller, et. al., NeurIPS 2022, https://arxiv.org/abs/2210.06170 Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If None , the prior must be passed to .build_posterior() . None classifier Union[str, Callable] Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the classifier. 'resnet' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'warning' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is /logs .) None show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/snre/snre_c.py def __init__ ( self , prior : Optional [ Distribution ] = None , classifier : Union [ str , Callable ] = \"resnet\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"warning\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"NRE-C[1] is a generalization of the non-sequential (amortized) versions of SNRE_A and SNRE_B. We call the algorithm SNRE_C within `sbi`. NRE-C: (1) like SNRE_B, features a \"multiclass\" loss function where several marginally drawn parameter-data pairs are contrasted against a jointly drawn pair. (2) like AALR/NRE_A, i.e., the non-sequential version of SNRE_A, it encourages the approximate ratio $p(\\theta,x)/p(\\theta)p(x)$, accessed through `.potential()` within `sbi`, to be exact at optimum. This addresses the issue that SNRE_B estimates this ratio only up to an arbitrary function (normalizing constant) of the data $x$. Just like for all ratio estimation algorithms, the sequential version of SNRE_C will be estimated only up to a function (normalizing constant) of the data $x$ in rounds after the first. [1] _Contrastive Neural Ratio Estimation_, Benajmin Kurt Miller, et. al., NeurIPS 2022, https://arxiv.org/abs/2210.06170 Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If `None`, the prior must be passed to `.build_posterior()`. classifier: Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the classifier. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , exclude_invalid_x = False , from_round = 0 , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required exclude_invalid_x bool Whether invalid simulations are discarded during training. If False , SNRE raises an error when invalid simulations are found. If True , invalid simulations are discarded and training can proceed, but this gives systematically wrong results. False from_round int Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for SNRE . Only when the user later on requests .train(discard_prior_samples=True) , we use these indices to find which training data stemmed from the prior. 0 data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description RatioEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snre/snre_c.py def append_simulations ( self , theta : Tensor , x : Tensor , exclude_invalid_x : bool = False , from_round : int = 0 , data_device : Optional [ str ] = None , ) -> \"RatioEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. exclude_invalid_x: Whether invalid simulations are discarded during training. If `False`, SNRE raises an error when invalid simulations are found. If `True`, invalid simulations are discarded and training can proceed, but this gives systematically wrong results. from_round: Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for `SNRE`. Only when the user later on requests `.train(discard_prior_samples=True)`, we use these indices to find which training data stemmed from the prior. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"SNRE\" ) if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) prior_masks = mask_sims_from_prior ( int ( from_round ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _data_round_index . append ( int ( from_round )) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'mcmc' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability \\(p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)\\) and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the normalized posterior, but sampling still requires MCMC (or rejection sampling). Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'mcmc' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note that some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior . {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snre/snre_c.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"mcmc\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior ]: r \"\"\"Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability $p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)$ and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the **normalized** posterior, but sampling still requires MCMC (or rejection sampling). Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note that some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior`. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNRE(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior else : check_prior ( prior ) if density_estimator is None : ratio_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : ratio_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = ratio_estimator_based_potential ( ratio_estimator = ratio_estimator , prior = prior , x_o = None , ) if sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"rejection\" : self . _posterior = RejectionPosterior ( potential_fn = potential_fn , proposal = prior , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snre/snre_c.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snre/snre_c.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , num_classes = 5 , gamma = 1.0 , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , resume_training = False , discard_prior_samples = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None ) \u00b6 Return classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Parameters: Name Type Description Default num_classes int Number of theta to classify against, corresponds to \\(K\\) in Contrastive Neural Ratio Estimation . Minimum value is 1. Similar to num_atoms for SNRE_B except SNRE_C has an additional independently drawn sample. The total number of alternative parameters NRE-C \u201csees\u201d is \\(2K-1\\) or 2 * num_classes - 1 divided between two loss terms. 5 gamma float Determines the relative weight of the sum of all \\(K\\) dependently drawn classes against the marginally drawn one. Specifically, \\(p(y=k) :=p_K\\) , \\(p(y=0) := p_0\\) , \\(p_0 = 1 - K p_K\\) , and finally \\(\\gamma := K p_K / p_0\\) . 1.0 training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 exclude_invalid_x Whether to exclude simulation outputs x=NaN or x=\u00b1\u221e during training. Expect errors, silent or explicit, when False . required resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None Returns: Type Description Module Classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Source code in sbi/inference/snre/snre_c.py def train ( self , num_classes : int = 5 , gamma : float = 1.0 , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , resume_training : bool = False , discard_prior_samples : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , ) -> nn . Module : r \"\"\"Return classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. Args: num_classes: Number of theta to classify against, corresponds to $K$ in _Contrastive Neural Ratio Estimation_. Minimum value is 1. Similar to `num_atoms` for SNRE_B except SNRE_C has an additional independently drawn sample. The total number of alternative parameters `NRE-C` \"sees\" is $2K-1$ or `2 * num_classes - 1` divided between two loss terms. gamma: Determines the relative weight of the sum of all $K$ dependently drawn classes against the marginally drawn one. Specifically, $p(y=k) :=p_K$, $p(y=0) := p_0$, $p_0 = 1 - K p_K$, and finally $\\gamma := K p_K / p_0$. training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. exclude_invalid_x: Whether to exclude simulation outputs `x=NaN` or `x=\u00b1\u221e` during training. Expect errors, silent or explicit, when `False`. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) Returns: Classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) kwargs [ \"num_atoms\" ] = kwargs . pop ( \"num_classes\" ) + 1 kwargs [ \"loss_kwargs\" ] = { \"gamma\" : kwargs . pop ( \"gamma\" )} return super () . train ( ** kwargs ) sbi.inference.snre.bnre.BNRE ( SNRE_A ) \u00b6 __init__ ( self , prior = None , classifier = 'resnet' , device = 'cpu' , logging_level = 'warning' , summary_writer = None , show_progress_bars = True ) special \u00b6 Balanced neural ratio estimation (BNRE)[1]. BNRE is a variation of NRE aiming to produce more conservative posterior approximations [1] Delaunoy, A., Hermans, J., Rozet, F., Wehenkel, A., & Louppe, G.. Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation. NeurIPS 2022. https://arxiv.org/abs/2208.13624 Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If None , the prior must be passed to .build_posterior() . None classifier Union[str, Callable] Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations \\((\\theta, x)\\) , which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the classifier. 'resnet' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'warning' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is /logs .) None show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/snre/bnre.py def __init__ ( self , prior : Optional [ Distribution ] = None , classifier : Union [ str , Callable ] = \"resnet\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"warning\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"Balanced neural ratio estimation (BNRE)[1]. BNRE is a variation of NRE aiming to produce more conservative posterior approximations [1] Delaunoy, A., Hermans, J., Rozet, F., Wehenkel, A., & Louppe, G.. Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation. NeurIPS 2022. https://arxiv.org/abs/2208.13624 Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If `None`, the prior must be passed to `.build_posterior()`. classifier: Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations $(\\theta, x)$, which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the classifier. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , exclude_invalid_x = False , from_round = 0 , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required exclude_invalid_x bool Whether invalid simulations are discarded during training. If False , SNRE raises an error when invalid simulations are found. If True , invalid simulations are discarded and training can proceed, but this gives systematically wrong results. False from_round int Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for SNRE . Only when the user later on requests .train(discard_prior_samples=True) , we use these indices to find which training data stemmed from the prior. 0 data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description RatioEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snre/bnre.py def append_simulations ( self , theta : Tensor , x : Tensor , exclude_invalid_x : bool = False , from_round : int = 0 , data_device : Optional [ str ] = None , ) -> \"RatioEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. exclude_invalid_x: Whether invalid simulations are discarded during training. If `False`, SNRE raises an error when invalid simulations are found. If `True`, invalid simulations are discarded and training can proceed, but this gives systematically wrong results. from_round: Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for `SNRE`. Only when the user later on requests `.train(discard_prior_samples=True)`, we use these indices to find which training data stemmed from the prior. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"SNRE\" ) if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) prior_masks = mask_sims_from_prior ( int ( from_round ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _data_round_index . append ( int ( from_round )) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'mcmc' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability \\(p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)\\) and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the normalized posterior, but sampling still requires MCMC (or rejection sampling). Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'mcmc' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note that some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior . {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snre/bnre.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"mcmc\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior ]: r \"\"\"Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability $p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)$ and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the **normalized** posterior, but sampling still requires MCMC (or rejection sampling). Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note that some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior`. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNRE(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior else : check_prior ( prior ) if density_estimator is None : ratio_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : ratio_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = ratio_estimator_based_potential ( ratio_estimator = ratio_estimator , prior = prior , x_o = None , ) if sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"rejection\" : self . _posterior = RejectionPosterior ( potential_fn = potential_fn , proposal = prior , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snre/bnre.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snre/bnre.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , regularization_strength = 100.0 , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , resume_training = False , discard_prior_samples = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None ) \u00b6 Return classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Parameters: Name Type Description Default regularization_strength float The multiplicative coefficient applied to the balancing regularizer ( \\(\\lambda\\) ). 100.0 training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 exclude_invalid_x Whether to exclude simulation outputs x=NaN or x=\u00b1\u221e during training. Expect errors, silent or explicit, when False . required resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None Returns: Type Description Module Classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Source code in sbi/inference/snre/bnre.py def train ( self , regularization_strength : float = 100.0 , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , resume_training : bool = False , discard_prior_samples : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , ) -> nn . Module : r \"\"\"Return classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. Args: regularization_strength: The multiplicative coefficient applied to the balancing regularizer ($\\lambda$). training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. exclude_invalid_x: Whether to exclude simulation outputs `x=NaN` or `x=\u00b1\u221e` during training. Expect errors, silent or explicit, when `False`. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) Returns: Classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) kwargs [ \"loss_kwargs\" ] = { \"regularization_strength\" : kwargs . pop ( \"regularization_strength\" ) } return super () . train ( ** kwargs ) sbi.inference.abc.mcabc.MCABC ( ABCBASE ) \u00b6 __call__ ( self , x_o , num_simulations , eps = None , quantile = None , lra = False , sass = False , sass_fraction = 0.25 , sass_expansion_degree = 1 , kde = False , kde_kwargs = {}, return_summary = False ) special \u00b6 Run MCABC and return accepted parameters or KDE object fitted on them. Parameters: Name Type Description Default x_o Union[torch.Tensor, numpy.ndarray] Observed data. required num_simulations int Number of simulations to run. required eps Optional[float] Acceptance threshold \\(\\epsilon\\) for distance between observed and simulated data. None quantile Optional[float] Upper quantile of smallest distances for which the corresponding parameters are returned, e.g, q=0.01 will return the top 1%. Exactly one of quantile or eps have to be passed. None lra bool Whether to run linear regression adjustment as in Beaumont et al. 2002 False sass bool Whether to determine semi-automatic summary statistics as in Fearnhead & Prangle 2012. False sass_fraction float Fraction of simulation budget used for the initial sass run. 0.25 sass_expansion_degree int Degree of the polynomial feature expansion for the sass regression, default 1 - no expansion. 1 kde bool Whether to run KDE on the accepted parameters to return a KDE object from which one can sample. False kde_kwargs Dict[str, Any] kwargs for performing KDE: \u2018bandwidth=\u2019; either a float, or a string naming a bandwidth heuristics, e.g., \u2018cv\u2019 (cross validation), \u2018silvermann\u2019 or \u2018scott\u2019, default \u2018cv\u2019. \u2018transform\u2019: transform applied to the parameters before doing KDE. \u2018sample_weights\u2019: weights associated with samples. See \u2018get_kde\u2019 for more details {} return_summary bool Whether to return the distances and data corresponding to the accepted parameters. False Returns: Type Description theta (if kde False) accepted parameters kde (if kde True): KDE object based on accepted parameters from which one can .sample() and .log_prob(). summary (if summary True): dictionary containing the accepted paramters (if kde True), distances and simulated data x. Source code in sbi/inference/abc/mcabc.py def __call__ ( self , x_o : Union [ Tensor , ndarray ], num_simulations : int , eps : Optional [ float ] = None , quantile : Optional [ float ] = None , lra : bool = False , sass : bool = False , sass_fraction : float = 0.25 , sass_expansion_degree : int = 1 , kde : bool = False , kde_kwargs : Dict [ str , Any ] = {}, return_summary : bool = False , ) -> Union [ Tuple [ Tensor , dict ], Tuple [ KDEWrapper , dict ], Tensor , KDEWrapper ]: r \"\"\"Run MCABC and return accepted parameters or KDE object fitted on them. Args: x_o: Observed data. num_simulations: Number of simulations to run. eps: Acceptance threshold $\\epsilon$ for distance between observed and simulated data. quantile: Upper quantile of smallest distances for which the corresponding parameters are returned, e.g, q=0.01 will return the top 1%. Exactly one of quantile or `eps` have to be passed. lra: Whether to run linear regression adjustment as in Beaumont et al. 2002 sass: Whether to determine semi-automatic summary statistics as in Fearnhead & Prangle 2012. sass_fraction: Fraction of simulation budget used for the initial sass run. sass_expansion_degree: Degree of the polynomial feature expansion for the sass regression, default 1 - no expansion. kde: Whether to run KDE on the accepted parameters to return a KDE object from which one can sample. kde_kwargs: kwargs for performing KDE: 'bandwidth='; either a float, or a string naming a bandwidth heuristics, e.g., 'cv' (cross validation), 'silvermann' or 'scott', default 'cv'. 'transform': transform applied to the parameters before doing KDE. 'sample_weights': weights associated with samples. See 'get_kde' for more details return_summary: Whether to return the distances and data corresponding to the accepted parameters. Returns: theta (if kde False): accepted parameters kde (if kde True): KDE object based on accepted parameters from which one can .sample() and .log_prob(). summary (if summary True): dictionary containing the accepted paramters (if kde True), distances and simulated data x. \"\"\" # Exactly one of eps or quantile need to be passed. assert ( eps is not None ) ^ ( quantile is not None ), \"Eps or quantile must be passed, but not both.\" # Run SASS and change the simulator and x_o accordingly. if sass : num_pilot_simulations = int ( sass_fraction * num_simulations ) self . logger . info ( f \"Running SASS with { num_pilot_simulations } pilot samples.\" ) num_simulations -= num_pilot_simulations pilot_theta = self . prior . sample (( num_pilot_simulations ,)) pilot_x = self . _batched_simulator ( pilot_theta ) sass_transform = self . get_sass_transform ( pilot_theta , pilot_x , sass_expansion_degree ) simulator = lambda theta : sass_transform ( self . _batched_simulator ( theta )) x_o = sass_transform ( x_o ) else : simulator = self . _batched_simulator # Simulate and calculate distances. theta = self . prior . sample (( num_simulations ,)) x = simulator ( theta ) # Infer shape of x to test and set x_o. self . x_shape = x [ 0 ] . unsqueeze ( 0 ) . shape self . x_o = process_x ( x_o , self . x_shape ) distances = self . distance ( self . x_o , x ) # Select based on acceptance threshold epsilon. if eps is not None : is_accepted = distances < eps num_accepted = is_accepted . sum () . item () assert num_accepted > 0 , f \"No parameters accepted, eps= { eps } too small\" theta_accepted = theta [ is_accepted ] distances_accepted = distances [ is_accepted ] x_accepted = x [ is_accepted ] # Select based on quantile on sorted distances. elif quantile is not None : num_top_samples = int ( num_simulations * quantile ) sort_idx = torch . argsort ( distances ) theta_accepted = theta [ sort_idx ][: num_top_samples ] distances_accepted = distances [ sort_idx ][: num_top_samples ] x_accepted = x [ sort_idx ][: num_top_samples ] else : raise ValueError ( \"One of epsilon or quantile has to be passed.\" ) # Maybe adjust theta with LRA. if lra : self . logger . info ( \"Running Linear regression adjustment.\" ) final_theta = self . run_lra ( theta_accepted , x_accepted , observation = self . x_o ) else : final_theta = theta_accepted if kde : self . logger . info ( f \"\"\"KDE on { final_theta . shape [ 0 ] } samples with bandwidth option { kde_kwargs [ \"bandwidth\" ] if \"bandwidth\" in kde_kwargs else \"cv\" } . Beware that KDE can give unreliable results when used with too few samples and in high dimensions.\"\"\" ) kde_dist = get_kde ( final_theta , ** kde_kwargs ) if return_summary : return ( kde_dist , dict ( theta = final_theta , distances = distances_accepted , x = x_accepted ), ) else : return kde_dist elif return_summary : return final_theta , dict ( distances = distances_accepted , x = x_accepted ) else : return final_theta __init__ ( self , simulator , prior , distance = 'l2' , num_workers = 1 , simulation_batch_size = 1 , show_progress_bars = True ) special \u00b6 Monte-Carlo Approximate Bayesian Computation (Rejection ABC) [1]. [1] Pritchard, J. K., Seielstad, M. T., Perez-Lezaun, A., & Feldman, M. W. (1999). Population growth of human Y chromosomes: a study of Y chromosome microsatellites. Molecular biology and evolution, 16(12), 1791-1798. Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\mathrm{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required prior A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with .log_prob() and .sample() (for example, a PyTorch distribution) can be used. required distance Union[str, Callable] Distance function to compare observed and simulated data. Can be a custom function or one of l1 , l2 , mse . 'l2' num_workers int Number of parallel workers to use for simulations. 1 simulation_batch_size int Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). 1 show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/abc/mcabc.py def __init__ ( self , simulator : Callable , prior , distance : Union [ str , Callable ] = \"l2\" , num_workers : int = 1 , simulation_batch_size : int = 1 , show_progress_bars : bool = True , ): r \"\"\"Monte-Carlo Approximate Bayesian Computation (Rejection ABC) [1]. [1] Pritchard, J. K., Seielstad, M. T., Perez-Lezaun, A., & Feldman, M. W. (1999). Population growth of human Y chromosomes: a study of Y chromosome microsatellites. Molecular biology and evolution, 16(12), 1791-1798. Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\mathrm{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. distance: Distance function to compare observed and simulated data. Can be a custom function or one of `l1`, `l2`, `mse`. num_workers: Number of parallel workers to use for simulations. simulation_batch_size: Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" super () . __init__ ( simulator = simulator , prior = prior , distance = distance , num_workers = num_workers , simulation_batch_size = simulation_batch_size , show_progress_bars = show_progress_bars , ) get_distance_function ( distance_type = 'l2' ) inherited \u00b6 Return distance function for given distance type. Parameters: Name Type Description Default distance_type Union[str, Callable] string indicating the distance type, e.g., \u2018l2\u2019, \u2018l1\u2019, \u2018mse\u2019. Note that the returned distance function averages over the last dimension, e.g., over the summary statistics. 'l2' Returns: Type Description distance_fun distance functions built from passe string. Returns distance_type is callable. Source code in sbi/inference/abc/mcabc.py @staticmethod def get_distance_function ( distance_type : Union [ str , Callable ] = \"l2\" ) -> Callable : \"\"\"Return distance function for given distance type. Args: distance_type: string indicating the distance type, e.g., 'l2', 'l1', 'mse'. Note that the returned distance function averages over the last dimension, e.g., over the summary statistics. Returns: distance_fun: distance functions built from passe string. Returns distance_type is callable. \"\"\" if isinstance ( distance_type , Callable ): return distance_type distances = [ \"l1\" , \"l2\" , \"mse\" ] assert ( distance_type in distances ), f \" { distance_type } must be one of { distances } .\" if distance_type == \"mse\" : distance = lambda xo , x : torch . mean (( xo - x ) ** 2 , dim =- 1 ) elif distance_type == \"l2\" : distance = lambda xo , x : torch . norm (( xo - x ), dim =- 1 ) elif distance_type == \"l1\" : distance = lambda xo , x : torch . mean ( abs ( xo - x ), dim =- 1 ) else : raise ValueError ( r \"Distance {distance_type} not supported.\" ) def distance_fun ( observed_data : Tensor , simulated_data : Tensor ) -> Tensor : \"\"\"Return distance over batch dimension. Args: observed_data: Observed data, could be 1D. simulated_data: Batch of simulated data, has batch dimension. Returns: Torch tensor with batch of distances. \"\"\" assert simulated_data . ndim == 2 , \"simulated data needs batch dimension\" return distance ( observed_data , simulated_data ) return distance_fun get_sass_transform ( theta , x , expansion_degree = 1 , sample_weight = None ) inherited \u00b6 Return semi-automatic summary statitics function. Running weighted linear regressin as in Fearnhead & Prandle 2012: https://arxiv.org/abs/1004.1112 Following implementation in https://abcpy.readthedocs.io/en/latest/_modules/abcpy/statistics.html#Identity and https://pythonhosted.org/abcpy/_modules/abcpy/summaryselections.html#Semiautomatic Source code in sbi/inference/abc/mcabc.py @staticmethod def get_sass_transform ( theta : torch . Tensor , x : torch . Tensor , expansion_degree : int = 1 , sample_weight = None , ) -> Callable : \"\"\"Return semi-automatic summary statitics function. Running weighted linear regressin as in Fearnhead & Prandle 2012: https://arxiv.org/abs/1004.1112 Following implementation in https://abcpy.readthedocs.io/en/latest/_modules/abcpy/statistics.html#Identity and https://pythonhosted.org/abcpy/_modules/abcpy/summaryselections.html#Semiautomatic \"\"\" expansion = PolynomialFeatures ( degree = expansion_degree , include_bias = False ) # Transform x, remove intercept. x_expanded = expansion . fit_transform ( x ) sumstats_map = np . zeros (( x_expanded . shape [ 1 ], theta . shape [ 1 ])) for parameter_idx in range ( theta . shape [ 1 ]): regression_model = LinearRegression ( fit_intercept = True ) regression_model . fit ( X = x_expanded , y = theta [:, parameter_idx ], sample_weight = sample_weight ) sumstats_map [:, parameter_idx ] = regression_model . coef_ sumstats_map = torch . tensor ( sumstats_map , dtype = torch . float32 ) def sumstats_transform ( x ): x_expanded = torch . tensor ( expansion . fit_transform ( x ), dtype = torch . float32 ) return x_expanded . mm ( sumstats_map ) return sumstats_transform run_lra ( theta , x , observation , sample_weight = None ) inherited \u00b6 Return parameters adjusted with linear regression adjustment. Implementation as in Beaumont et al. 2002: https://arxiv.org/abs/1707.01254 Source code in sbi/inference/abc/mcabc.py @staticmethod def run_lra ( theta : torch . Tensor , x : torch . Tensor , observation : torch . Tensor , sample_weight = None , ) -> torch . Tensor : \"\"\"Return parameters adjusted with linear regression adjustment. Implementation as in Beaumont et al. 2002: https://arxiv.org/abs/1707.01254 \"\"\" theta_adjusted = theta for parameter_idx in range ( theta . shape [ 1 ]): regression_model = LinearRegression ( fit_intercept = True ) regression_model . fit ( X = x , y = theta [:, parameter_idx ], sample_weight = sample_weight , ) theta_adjusted [:, parameter_idx ] += regression_model . predict ( observation . reshape ( 1 , - 1 ) ) theta_adjusted [:, parameter_idx ] -= regression_model . predict ( x ) return theta_adjusted sbi.inference.abc.smcabc.SMCABC ( ABCBASE ) \u00b6 __call__ ( self , x_o , num_particles , num_initial_pop , num_simulations , epsilon_decay , distance_based_decay = False , ess_min = None , kernel_variance_scale = 1.0 , use_last_pop_samples = True , return_summary = False , kde = False , kde_kwargs = {}, kde_sample_weights = False , lra = False , lra_with_weights = False , sass = False , sass_fraction = 0.25 , sass_expansion_degree = 1 ) special \u00b6 Run SMCABC and return accepted parameters or KDE object fitted on them. Parameters: Name Type Description Default x_o Union[torch.Tensor, numpy.ndarray] Observed data. required num_particles int Number of particles in each population. required num_initial_pop int Number of simulations used for initial population. required num_simulations int Total number of possible simulations. required epsilon_decay float Factor with which the acceptance threshold \\(\\epsilon\\) decays. required distance_based_decay bool Whether the \\(\\epsilon\\) decay is constant over populations or calculated from the previous populations distribution of distances. False ess_min Optional[float] Threshold of effective sampling size for resampling weights. Not used when None (default). None kernel_variance_scale float Factor for scaling the perturbation kernel variance. 1.0 use_last_pop_samples bool Whether to fill up the current population with samples from the previous population when the budget is used up. If False, the current population is discarded and the previous population is returned. True lra bool Whether to run linear regression adjustment as in Beaumont et al. 2002 False lra_with_weights bool Whether to run lra as weighted linear regression with SMC weights False sass bool Whether to determine semi-automatic summary statistics as in Fearnhead & Prangle 2012. False sass_fraction float Fraction of simulation budget used for the initial sass run. 0.25 sass_expansion_degree int Degree of the polynomial feature expansion for the sass regression, default 1 - no expansion. 1 kde bool Whether to run KDE on the accepted parameters to return a KDE object from which one can sample. False kde_kwargs Dict[str, Any] kwargs for performing KDE: \u2018bandwidth=\u2019; either a float, or a string naming a bandwidth heuristics, e.g., \u2018cv\u2019 (cross validation), \u2018silvermann\u2019 or \u2018scott\u2019, default \u2018cv\u2019. \u2018transform\u2019: transform applied to the parameters before doing KDE. \u2018sample_weights\u2019: weights associated with samples. See \u2018get_kde\u2019 for more details {} kde_sample_weights bool Whether perform weighted KDE with SMC weights or on raw particles. False return_summary bool Whether to return a dictionary with all accepted particles, weights, etc. at the end. False Returns: Type Description theta (if kde False) accepted parameters of the last population. kde (if kde True): KDE object fitted on accepted parameters, from which one can .sample() and .log_prob(). summary (if return_summary True): dictionary containing the accepted paramters (if kde True), distances and simulated data x of all populations. Source code in sbi/inference/abc/smcabc.py def __call__ ( self , x_o : Union [ Tensor , ndarray ], num_particles : int , num_initial_pop : int , num_simulations : int , epsilon_decay : float , distance_based_decay : bool = False , ess_min : Optional [ float ] = None , kernel_variance_scale : float = 1.0 , use_last_pop_samples : bool = True , return_summary : bool = False , kde : bool = False , kde_kwargs : Dict [ str , Any ] = {}, kde_sample_weights : bool = False , lra : bool = False , lra_with_weights : bool = False , sass : bool = False , sass_fraction : float = 0.25 , sass_expansion_degree : int = 1 , ) -> Union [ Tensor , KDEWrapper , Tuple [ Tensor , dict ], Tuple [ KDEWrapper , dict ]]: r \"\"\"Run SMCABC and return accepted parameters or KDE object fitted on them. Args: x_o: Observed data. num_particles: Number of particles in each population. num_initial_pop: Number of simulations used for initial population. num_simulations: Total number of possible simulations. epsilon_decay: Factor with which the acceptance threshold $\\epsilon$ decays. distance_based_decay: Whether the $\\epsilon$ decay is constant over populations or calculated from the previous populations distribution of distances. ess_min: Threshold of effective sampling size for resampling weights. Not used when None (default). kernel_variance_scale: Factor for scaling the perturbation kernel variance. use_last_pop_samples: Whether to fill up the current population with samples from the previous population when the budget is used up. If False, the current population is discarded and the previous population is returned. lra: Whether to run linear regression adjustment as in Beaumont et al. 2002 lra_with_weights: Whether to run lra as weighted linear regression with SMC weights sass: Whether to determine semi-automatic summary statistics as in Fearnhead & Prangle 2012. sass_fraction: Fraction of simulation budget used for the initial sass run. sass_expansion_degree: Degree of the polynomial feature expansion for the sass regression, default 1 - no expansion. kde: Whether to run KDE on the accepted parameters to return a KDE object from which one can sample. kde_kwargs: kwargs for performing KDE: 'bandwidth='; either a float, or a string naming a bandwidth heuristics, e.g., 'cv' (cross validation), 'silvermann' or 'scott', default 'cv'. 'transform': transform applied to the parameters before doing KDE. 'sample_weights': weights associated with samples. See 'get_kde' for more details kde_sample_weights: Whether perform weighted KDE with SMC weights or on raw particles. return_summary: Whether to return a dictionary with all accepted particles, weights, etc. at the end. Returns: theta (if kde False): accepted parameters of the last population. kde (if kde True): KDE object fitted on accepted parameters, from which one can .sample() and .log_prob(). summary (if return_summary True): dictionary containing the accepted paramters (if kde True), distances and simulated data x of all populations. \"\"\" pop_idx = 0 self . num_simulations = num_simulations # Pilot run for SASS. if sass : num_pilot_simulations = int ( sass_fraction * num_simulations ) self . logger . info ( f \"Running SASS with { num_pilot_simulations } pilot samples.\" ) sass_transform = self . run_sass_set_xo ( num_particles , num_pilot_simulations , x_o , lra , sass_expansion_degree ) # Udpate simulator and xo x_o = sass_transform ( self . x_o ) def sass_simulator ( theta ): self . simulation_counter += theta . shape [ 0 ] return sass_transform ( self . _batched_simulator ( theta )) self . _simulate_with_budget = sass_simulator # run initial population particles , epsilon , distances , x = self . _set_xo_and_sample_initial_population ( x_o , num_particles , num_initial_pop ) log_weights = torch . log ( 1 / num_particles * ones ( num_particles )) self . logger . info ( ( f \"population= { pop_idx } , eps= { epsilon } , ess= { 1.0 } , \" f \"num_sims= { num_initial_pop } \" ) ) all_particles = [ particles ] all_log_weights = [ log_weights ] all_distances = [ distances ] all_epsilons = [ epsilon ] all_x = [ x ] while self . simulation_counter < self . num_simulations : pop_idx += 1 # Decay based on quantile of distances from previous pop. if distance_based_decay : epsilon = self . _get_next_epsilon ( all_distances [ pop_idx - 1 ], epsilon_decay ) # Constant decay. else : epsilon *= epsilon_decay # Get kernel variance from previous pop. self . kernel_variance = self . get_kernel_variance ( all_particles [ pop_idx - 1 ], torch . exp ( all_log_weights [ pop_idx - 1 ]), samples_per_dim = 500 , kernel_variance_scale = kernel_variance_scale , ) particles , log_weights , distances , x = self . _sample_next_population ( particles = all_particles [ pop_idx - 1 ], log_weights = all_log_weights [ pop_idx - 1 ], distances = all_distances [ pop_idx - 1 ], epsilon = epsilon , x = all_x [ pop_idx - 1 ], use_last_pop_samples = use_last_pop_samples , ) # Resample population if effective sampling size is too small. if ess_min is not None : particles , log_weights = self . resample_if_ess_too_small ( particles , log_weights , ess_min , pop_idx ) self . logger . info ( ( f \"population= { pop_idx } done: eps= { epsilon : .6f } ,\" f \" num_sims= { self . simulation_counter } .\" ) ) # collect results all_particles . append ( particles ) all_log_weights . append ( log_weights ) all_distances . append ( distances ) all_epsilons . append ( epsilon ) all_x . append ( x ) # Maybe run LRA and adjust weights. if lra : self . logger . info ( \"Running Linear regression adjustment.\" ) adjusted_particles , adjusted_weights = self . run_lra_update_weights ( particles = all_particles [ - 1 ], xs = all_x [ - 1 ], observation = process_x ( x_o ), log_weights = all_log_weights [ - 1 ], lra_with_weights = lra_with_weights , ) final_particles = adjusted_particles else : final_particles = all_particles [ - 1 ] if kde : self . logger . info ( f \"\"\"KDE on { final_particles . shape [ 0 ] } samples with bandwidth option { kde_kwargs [ \"bandwidth\" ] if \"bandwidth\" in kde_kwargs else \"cv\" } . Beware that KDE can give unreliable results when used with too few samples and in high dimensions.\"\"\" ) # Maybe get particles weights from last population for weighted KDE. if kde_sample_weights : kde_kwargs [ \"sample_weights\" ] = all_log_weights [ - 1 ] . exp () kde_dist = get_kde ( final_particles , ** kde_kwargs ) if return_summary : return ( kde_dist , dict ( particles = all_particles , weights = all_log_weights , epsilons = all_epsilons , distances = all_distances , xs = all_x , ), ) else : return kde_dist if return_summary : return ( final_particles , dict ( particles = all_particles , weights = all_log_weights , epsilons = all_epsilons , distances = all_distances , xs = all_x , ), ) else : return final_particles __init__ ( self , simulator , prior , distance = 'l2' , num_workers = 1 , simulation_batch_size = 1 , show_progress_bars = True , kernel = 'gaussian' , algorithm_variant = 'C' ) special \u00b6 Sequential Monte Carlo Approximate Bayesian Computation. We distinguish between three different SMC methods here: - A: Toni et al. 2010 (Phd Thesis) - B: Sisson et al. 2007 (with correction from 2009) - C: Beaumont et al. 2009 In Toni et al. 2010 we find an overview of the differences on page 34: - B: same as A except for resampling of weights if the effective sampling size is too small. - C: same as A except for calculation of the covariance of the perturbation kernel: the kernel covariance is a scaled version of the covariance of the previous population. Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\mathrm{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required prior Distribution A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with .log_prob() and .sample() (for example, a PyTorch distribution) can be used. required distance Union[str, Callable] Distance function to compare observed and simulated data. Can be a custom function or one of l1 , l2 , mse . 'l2' num_workers int Number of parallel workers to use for simulations. 1 simulation_batch_size int Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). 1 show_progress_bars bool Whether to show a progressbar during simulation and sampling. True kernel Optional[str] Perturbation kernel. 'gaussian' algorithm_variant str Indicating the choice of algorithm variant, A, B, or C. 'C' Source code in sbi/inference/abc/smcabc.py def __init__ ( self , simulator : Callable , prior : Distribution , distance : Union [ str , Callable ] = \"l2\" , num_workers : int = 1 , simulation_batch_size : int = 1 , show_progress_bars : bool = True , kernel : Optional [ str ] = \"gaussian\" , algorithm_variant : str = \"C\" , ): r \"\"\"Sequential Monte Carlo Approximate Bayesian Computation. We distinguish between three different SMC methods here: - A: Toni et al. 2010 (Phd Thesis) - B: Sisson et al. 2007 (with correction from 2009) - C: Beaumont et al. 2009 In Toni et al. 2010 we find an overview of the differences on page 34: - B: same as A except for resampling of weights if the effective sampling size is too small. - C: same as A except for calculation of the covariance of the perturbation kernel: the kernel covariance is a scaled version of the covariance of the previous population. Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\mathrm{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. distance: Distance function to compare observed and simulated data. Can be a custom function or one of `l1`, `l2`, `mse`. num_workers: Number of parallel workers to use for simulations. simulation_batch_size: Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). show_progress_bars: Whether to show a progressbar during simulation and sampling. kernel: Perturbation kernel. algorithm_variant: Indicating the choice of algorithm variant, A, B, or C. \"\"\" super () . __init__ ( simulator = simulator , prior = prior , distance = distance , num_workers = num_workers , simulation_batch_size = simulation_batch_size , show_progress_bars = show_progress_bars , ) kernels = ( \"gaussian\" , \"uniform\" ) assert ( kernel in kernels ), f \"Kernel ' { kernel } ' not supported. Choose one from { kernels } .\" self . kernel = kernel algorithm_variants = ( \"A\" , \"B\" , \"C\" ) assert algorithm_variant in algorithm_variants , ( f \"SMCABC variant ' { algorithm_variant } ' not supported, choose one from\" \" {algorithm_variants} .\" ) self . algorithm_variant = algorithm_variant self . distance_to_x0 = None self . simulation_counter = 0 self . num_simulations = 0 # Define simulator that keeps track of budget. def simulate_with_budget ( theta ): self . simulation_counter += theta . shape [ 0 ] return self . _batched_simulator ( theta ) self . _simulate_with_budget = simulate_with_budget get_distance_function ( distance_type = 'l2' ) inherited \u00b6 Return distance function for given distance type. Parameters: Name Type Description Default distance_type Union[str, Callable] string indicating the distance type, e.g., \u2018l2\u2019, \u2018l1\u2019, \u2018mse\u2019. Note that the returned distance function averages over the last dimension, e.g., over the summary statistics. 'l2' Returns: Type Description distance_fun distance functions built from passe string. Returns distance_type is callable. Source code in sbi/inference/abc/smcabc.py @staticmethod def get_distance_function ( distance_type : Union [ str , Callable ] = \"l2\" ) -> Callable : \"\"\"Return distance function for given distance type. Args: distance_type: string indicating the distance type, e.g., 'l2', 'l1', 'mse'. Note that the returned distance function averages over the last dimension, e.g., over the summary statistics. Returns: distance_fun: distance functions built from passe string. Returns distance_type is callable. \"\"\" if isinstance ( distance_type , Callable ): return distance_type distances = [ \"l1\" , \"l2\" , \"mse\" ] assert ( distance_type in distances ), f \" { distance_type } must be one of { distances } .\" if distance_type == \"mse\" : distance = lambda xo , x : torch . mean (( xo - x ) ** 2 , dim =- 1 ) elif distance_type == \"l2\" : distance = lambda xo , x : torch . norm (( xo - x ), dim =- 1 ) elif distance_type == \"l1\" : distance = lambda xo , x : torch . mean ( abs ( xo - x ), dim =- 1 ) else : raise ValueError ( r \"Distance {distance_type} not supported.\" ) def distance_fun ( observed_data : Tensor , simulated_data : Tensor ) -> Tensor : \"\"\"Return distance over batch dimension. Args: observed_data: Observed data, could be 1D. simulated_data: Batch of simulated data, has batch dimension. Returns: Torch tensor with batch of distances. \"\"\" assert simulated_data . ndim == 2 , \"simulated data needs batch dimension\" return distance ( observed_data , simulated_data ) return distance_fun get_new_kernel ( self , thetas ) \u00b6 Return new kernel distribution for a given set of paramters. Source code in sbi/inference/abc/smcabc.py def get_new_kernel ( self , thetas : Tensor ) -> Distribution : \"\"\"Return new kernel distribution for a given set of paramters.\"\"\" if self . kernel == \"gaussian\" : assert self . kernel_variance . ndim == 2 return MultivariateNormal ( loc = thetas , covariance_matrix = self . kernel_variance ) elif self . kernel == \"uniform\" : low = thetas - self . kernel_variance high = thetas + self . kernel_variance # Move batch shape to event shape to get Uniform that is multivariate in # parameter dimension. return Uniform ( low = low , high = high ) . to_event ( 1 ) else : raise ValueError ( f \"Kernel, ' { self . kernel } ' not supported.\" ) get_particle_ranges ( self , particles , weights , samples_per_dim = 100 ) \u00b6 Return range of particles in each parameter dimension. Source code in sbi/inference/abc/smcabc.py def get_particle_ranges ( self , particles : Tensor , weights : Tensor , samples_per_dim : int = 100 ) -> Tensor : \"\"\"Return range of particles in each parameter dimension.\"\"\" # get weighted samples samples = self . sample_from_population_with_weights ( particles , weights , num_samples = samples_per_dim * particles . shape [ 1 ], ) # Variance spans the range of particles for every dimension. particle_ranges = samples . max ( 0 ) . values - samples . min ( 0 ) . values assert particle_ranges . ndim < 2 return particle_ranges get_sass_transform ( theta , x , expansion_degree = 1 , sample_weight = None ) inherited \u00b6 Return semi-automatic summary statitics function. Running weighted linear regressin as in Fearnhead & Prandle 2012: https://arxiv.org/abs/1004.1112 Following implementation in https://abcpy.readthedocs.io/en/latest/_modules/abcpy/statistics.html#Identity and https://pythonhosted.org/abcpy/_modules/abcpy/summaryselections.html#Semiautomatic Source code in sbi/inference/abc/smcabc.py @staticmethod def get_sass_transform ( theta : torch . Tensor , x : torch . Tensor , expansion_degree : int = 1 , sample_weight = None , ) -> Callable : \"\"\"Return semi-automatic summary statitics function. Running weighted linear regressin as in Fearnhead & Prandle 2012: https://arxiv.org/abs/1004.1112 Following implementation in https://abcpy.readthedocs.io/en/latest/_modules/abcpy/statistics.html#Identity and https://pythonhosted.org/abcpy/_modules/abcpy/summaryselections.html#Semiautomatic \"\"\" expansion = PolynomialFeatures ( degree = expansion_degree , include_bias = False ) # Transform x, remove intercept. x_expanded = expansion . fit_transform ( x ) sumstats_map = np . zeros (( x_expanded . shape [ 1 ], theta . shape [ 1 ])) for parameter_idx in range ( theta . shape [ 1 ]): regression_model = LinearRegression ( fit_intercept = True ) regression_model . fit ( X = x_expanded , y = theta [:, parameter_idx ], sample_weight = sample_weight ) sumstats_map [:, parameter_idx ] = regression_model . coef_ sumstats_map = torch . tensor ( sumstats_map , dtype = torch . float32 ) def sumstats_transform ( x ): x_expanded = torch . tensor ( expansion . fit_transform ( x ), dtype = torch . float32 ) return x_expanded . mm ( sumstats_map ) return sumstats_transform resample_if_ess_too_small ( self , particles , log_weights , ess_min , pop_idx ) \u00b6 Return resampled particles and uniform weights if effectice sampling size is too small. Source code in sbi/inference/abc/smcabc.py def resample_if_ess_too_small ( self , particles : Tensor , log_weights : Tensor , ess_min : float , pop_idx : int , ) -> Tuple [ Tensor , Tensor ]: \"\"\"Return resampled particles and uniform weights if effectice sampling size is too small. \"\"\" num_particles = particles . shape [ 0 ] ess = ( 1 / torch . sum ( torch . exp ( 2.0 * log_weights ), dim = 0 )) / num_particles # Resampling of weights for low ESS only for Sisson et al. 2007. if ess < ess_min : self . logger . info ( f \"ESS= { ess : .2f } too low, resampling pop { pop_idx } ...\" ) # First resample, then set to uniform weights as in Sisson et al. 2007. particles = self . sample_from_population_with_weights ( particles , torch . exp ( log_weights ), num_samples = num_particles ) log_weights = torch . log ( 1 / num_particles * ones ( num_particles )) return particles , log_weights run_lra ( theta , x , observation , sample_weight = None ) inherited \u00b6 Return parameters adjusted with linear regression adjustment. Implementation as in Beaumont et al. 2002: https://arxiv.org/abs/1707.01254 Source code in sbi/inference/abc/smcabc.py @staticmethod def run_lra ( theta : torch . Tensor , x : torch . Tensor , observation : torch . Tensor , sample_weight = None , ) -> torch . Tensor : \"\"\"Return parameters adjusted with linear regression adjustment. Implementation as in Beaumont et al. 2002: https://arxiv.org/abs/1707.01254 \"\"\" theta_adjusted = theta for parameter_idx in range ( theta . shape [ 1 ]): regression_model = LinearRegression ( fit_intercept = True ) regression_model . fit ( X = x , y = theta [:, parameter_idx ], sample_weight = sample_weight , ) theta_adjusted [:, parameter_idx ] += regression_model . predict ( observation . reshape ( 1 , - 1 ) ) theta_adjusted [:, parameter_idx ] -= regression_model . predict ( x ) return theta_adjusted run_lra_update_weights ( self , particles , xs , observation , log_weights , lra_with_weights ) \u00b6 Return particles and weights adjusted with LRA. Runs (weighted) linear regression from xs onto particles to adjust the particles. Updates the SMC weights according to the new particles. Source code in sbi/inference/abc/smcabc.py def run_lra_update_weights ( self , particles : Tensor , xs : Tensor , observation : Tensor , log_weights : Tensor , lra_with_weights : bool , ) -> Tuple [ Tensor , Tensor ]: \"\"\"Return particles and weights adjusted with LRA. Runs (weighted) linear regression from xs onto particles to adjust the particles. Updates the SMC weights according to the new particles. \"\"\" adjusted_particels = self . run_lra ( theta = particles , x = xs , observation = observation , sample_weight = log_weights . exp () if lra_with_weights else None , ) # Update SMC weights with LRA adjusted weights adjusted_log_weights = self . _calculate_new_log_weights ( new_particles = adjusted_particels , old_particles = particles , old_log_weights = log_weights , ) return adjusted_particels , adjusted_log_weights run_sass_set_xo ( self , num_particles , num_pilot_simulations , x_o , lra = False , sass_expansion_degree = 1 ) \u00b6 Return transform for semi-automatic summary statistics. Runs an single round of rejection abc with fixed budget and accepts num_particles simulations to run the regression for sass. Sets self.x_o once the x_shape can be derived from simulations. Source code in sbi/inference/abc/smcabc.py def run_sass_set_xo ( self , num_particles : int , num_pilot_simulations : int , x_o , lra : bool = False , sass_expansion_degree : int = 1 , ) -> Callable : \"\"\"Return transform for semi-automatic summary statistics. Runs an single round of rejection abc with fixed budget and accepts num_particles simulations to run the regression for sass. Sets self.x_o once the x_shape can be derived from simulations. \"\"\" ( pilot_particles , _ , _ , pilot_xs , ) = self . _set_xo_and_sample_initial_population ( x_o , num_particles , num_pilot_simulations ) # Adjust with LRA. if lra : pilot_particles = self . run_lra ( pilot_particles , pilot_xs , self . x_o ) sass_transform = self . get_sass_transform ( pilot_particles , pilot_xs , expansion_degree = sass_expansion_degree , sample_weight = None , ) return sass_transform sample_from_population_with_weights ( particles , weights , num_samples = 1 ) staticmethod \u00b6 Return samples from particles sampled with weights. Source code in sbi/inference/abc/smcabc.py @staticmethod def sample_from_population_with_weights ( particles : Tensor , weights : Tensor , num_samples : int = 1 ) -> Tensor : \"\"\"Return samples from particles sampled with weights.\"\"\" # define multinomial with weights as probs multi = Multinomial ( probs = weights ) # sample num samples, with replacement samples = multi . sample ( sample_shape = torch . Size (( num_samples ,))) # get indices of success trials indices = torch . where ( samples )[ 1 ] # return those indices from trace return particles [ indices ] Posteriors \u00b6 sbi.inference.posteriors.direct_posterior.DirectPosterior ( NeuralPosterior ) \u00b6 Posterior \\(p(\\theta|x_o)\\) with log_prob() and sample() methods, only applicable to SNPE. SNPE trains a neural network to directly approximate the posterior distribution. However, for bounded priors, the neural network can have leakage: it puts non-zero mass in regions where the prior is zero. The DirectPosterior class wraps the trained network to deal with these cases. Specifically, this class offers the following functionality: - correct the calculation of the log probability such that it compensates for the leakage. - reject samples that lie outside of the prior bounds. This class can not be used in combination with SNLE or SNRE. default_x : Optional [ torch . Tensor ] inherited property writable \u00b6 Return default x used by .sample(), .log_prob as conditioning context. __init__ ( self , posterior_estimator , prior , max_sampling_batch_size = 10000 , device = None , x_shape = None , enable_transform = True ) special \u00b6 Parameters: Name Type Description Default prior Distribution Prior distribution with .log_prob() and .sample() . required posterior_estimator Flow The trained neural posterior. required max_sampling_batch_size int Batchsize of samples being drawn from the proposal at every iteration. 10000 device Optional[str] Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:0\u201d. If None, potential_fn.device is used. None x_shape Optional[torch.Size] Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. None enable_transform bool Whether to transform parameters to unconstrained space during MAP optimization. When False, an identity transform will be returned for theta_transform . True Source code in sbi/inference/posteriors/direct_posterior.py def __init__ ( self , posterior_estimator : flows . Flow , prior : Distribution , max_sampling_batch_size : int = 10_000 , device : Optional [ str ] = None , x_shape : Optional [ torch . Size ] = None , enable_transform : bool = True , ): \"\"\" Args: prior: Prior distribution with `.log_prob()` and `.sample()`. posterior_estimator: The trained neural posterior. max_sampling_batch_size: Batchsize of samples being drawn from the proposal at every iteration. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:0\". If None, `potential_fn.device` is used. x_shape: Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. enable_transform: Whether to transform parameters to unconstrained space during MAP optimization. When False, an identity transform will be returned for `theta_transform`. \"\"\" # Because `DirectPosterior` does not take the `potential_fn` as input, it # builds it itself. The `potential_fn` and `theta_transform` are used only for # obtaining the MAP. check_prior ( prior ) potential_fn , theta_transform = posterior_estimator_based_potential ( posterior_estimator , prior , x_o = None , enable_transform = enable_transform , ) super () . __init__ ( potential_fn = potential_fn , theta_transform = theta_transform , device = device , x_shape = x_shape , ) self . prior = prior self . posterior_estimator = posterior_estimator self . max_sampling_batch_size = max_sampling_batch_size self . _leakage_density_correction_factor = None self . _purpose = \"\"\"It samples the posterior network and rejects samples that lie outside of the prior bounds.\"\"\" leakage_correction ( self , x , num_rejection_samples = 10000 , force_update = False , show_progress_bars = False , rejection_sampling_batch_size = 10000 ) \u00b6 Return leakage correction factor for a leaky posterior density estimate. The factor is estimated from the acceptance probability during rejection sampling from the posterior. This is to avoid re-estimating the acceptance probability from scratch whenever log_prob is called and norm_posterior=True . Here, it is estimated only once for self.default_x and saved for later. We re-evaluate only whenever a new x is passed. Parameters: Name Type Description Default num_rejection_samples int Number of samples used to estimate correction factor. 10000 show_progress_bars bool Whether to show a progress bar during sampling. False rejection_sampling_batch_size int Batch size for rejection sampling. 10000 Returns: Type Description Tensor Saved or newly-estimated correction factor (as a scalar Tensor ). Source code in sbi/inference/posteriors/direct_posterior.py @torch . no_grad () def leakage_correction ( self , x : Tensor , num_rejection_samples : int = 10_000 , force_update : bool = False , show_progress_bars : bool = False , rejection_sampling_batch_size : int = 10_000 , ) -> Tensor : r \"\"\"Return leakage correction factor for a leaky posterior density estimate. The factor is estimated from the acceptance probability during rejection sampling from the posterior. This is to avoid re-estimating the acceptance probability from scratch whenever `log_prob` is called and `norm_posterior=True`. Here, it is estimated only once for `self.default_x` and saved for later. We re-evaluate only whenever a new `x` is passed. Arguments: num_rejection_samples: Number of samples used to estimate correction factor. show_progress_bars: Whether to show a progress bar during sampling. rejection_sampling_batch_size: Batch size for rejection sampling. Returns: Saved or newly-estimated correction factor (as a scalar `Tensor`). \"\"\" def acceptance_at ( x : Tensor ) -> Tensor : return accept_reject_sample ( proposal = self . posterior_estimator , accept_reject_fn = lambda theta : within_support ( self . prior , theta ), num_samples = num_rejection_samples , show_progress_bars = show_progress_bars , sample_for_correction_factor = True , max_sampling_batch_size = rejection_sampling_batch_size , proposal_sampling_kwargs = { \"context\" : x }, )[ 1 ] # Check if the provided x matches the default x (short-circuit on identity). is_new_x = self . default_x is None or ( x is not self . default_x and ( x != self . default_x ) . any () ) not_saved_at_default_x = self . _leakage_density_correction_factor is None if is_new_x : # Calculate at x; don't save. return acceptance_at ( x ) elif not_saved_at_default_x or force_update : # Calculate at default_x; save. assert self . default_x is not None self . _leakage_density_correction_factor = acceptance_at ( self . default_x ) return self . _leakage_density_correction_factor # type: ignore log_prob ( self , theta , x = None , norm_posterior = True , track_gradients = False , leakage_correction_params = None ) \u00b6 Returns the log-probability of the posterior \\(p(\\theta|x)\\) . Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required norm_posterior bool Whether to enforce a normalized posterior density. Renormalization of the posterior is useful when some probability falls out or leaks out of the prescribed prior support. The normalizing factor is calculated via rejection sampling, so if you need speedier but unnormalized log posterior estimates set here norm_posterior=False . The returned log posterior is set to -\u221e outside of the prior support regardless of this setting. True track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False leakage_correction_params Optional[dict] A dict of keyword arguments to override the default values of leakage_correction() . Possible options are: num_rejection_samples , force_update , show_progress_bars , and rejection_sampling_batch_size . These parameters only have an effect if norm_posterior=True . None Returns: Type Description Tensor (len(\u03b8),) -shaped log posterior probability \\(\\log p(\\theta|x)\\) for \u03b8 in the support of the prior, -\u221e (corresponding to 0 probability) outside. Source code in sbi/inference/posteriors/direct_posterior.py def log_prob ( self , theta : Tensor , x : Optional [ Tensor ] = None , norm_posterior : bool = True , track_gradients : bool = False , leakage_correction_params : Optional [ dict ] = None , ) -> Tensor : r \"\"\"Returns the log-probability of the posterior $p(\\theta|x)$. Args: theta: Parameters $\\theta$. norm_posterior: Whether to enforce a normalized posterior density. Renormalization of the posterior is useful when some probability falls out or leaks out of the prescribed prior support. The normalizing factor is calculated via rejection sampling, so if you need speedier but unnormalized log posterior estimates set here `norm_posterior=False`. The returned log posterior is set to -\u221e outside of the prior support regardless of this setting. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. leakage_correction_params: A `dict` of keyword arguments to override the default values of `leakage_correction()`. Possible options are: `num_rejection_samples`, `force_update`, `show_progress_bars`, and `rejection_sampling_batch_size`. These parameters only have an effect if `norm_posterior=True`. Returns: `(len(\u03b8),)`-shaped log posterior probability $\\log p(\\theta|x)$ for \u03b8 in the support of the prior, -\u221e (corresponding to 0 probability) outside. \"\"\" x = self . _x_else_default_x ( x ) # TODO Train exited here, entered after sampling? self . posterior_estimator . eval () theta = ensure_theta_batched ( torch . as_tensor ( theta )) theta_repeated , x_repeated = match_theta_and_x_batch_shapes ( theta , x ) with torch . set_grad_enabled ( track_gradients ): # Evaluate on device, move back to cpu for comparison with prior. unnorm_log_prob = self . posterior_estimator . log_prob ( theta_repeated , context = x_repeated ) # Force probability to be zero outside prior support. in_prior_support = within_support ( self . prior , theta_repeated ) masked_log_prob = torch . where ( in_prior_support , unnorm_log_prob , torch . tensor ( float ( \"-inf\" ), dtype = torch . float32 , device = self . _device ), ) if leakage_correction_params is None : leakage_correction_params = dict () # use defaults log_factor = ( log ( self . leakage_correction ( x = x , ** leakage_correction_params )) if norm_posterior else 0 ) return masked_log_prob - log_factor map ( self , x = None , num_iter = 1000 , num_to_optimize = 100 , learning_rate = 0.01 , init_method = 'posterior' , num_init_samples = 1000 , save_best_every = 10 , show_progress_bars = False , force_update = False ) \u00b6 Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map() . The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a BoxUniform , we carry out the optimization in unbounded space and transform the result back into bounded space. Parameters: Name Type Description Default x Optional[torch.Tensor] Deprecated - use .set_default_x() prior to .map() . None num_iter int Number of optimization steps that the algorithm takes to find the MAP. 1000 learning_rate float Learning rate of the optimizer. 0.01 init_method Union[str, torch.Tensor] How to select the starting parameters for the optimization. If it is a string, it can be either [ posterior , prior ], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations. 'posterior' num_init_samples int Draw this number of samples from the posterior and evaluate the log-probability of all of them. 1000 num_to_optimize int From the drawn num_init_samples , use the num_to_optimize with highest log-probability as the initial points for the optimization. 100 save_best_every int The best log-probability is computed, saved in the map -attribute, and printed every save_best_every -th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10 .) 10 show_progress_bars bool Whether to show a progressbar during sampling from the posterior. False force_update bool Whether to re-calculate the MAP when x is unchanged and have a cached value. False log_prob_kwargs Will be empty for SNLE and SNRE. Will contain {\u2018norm_posterior\u2019: True} for SNPE. required Returns: Type Description Tensor The MAP estimate. Source code in sbi/inference/posteriors/direct_posterior.py def map ( self , x : Optional [ Tensor ] = None , num_iter : int = 1_000 , num_to_optimize : int = 100 , learning_rate : float = 0.01 , init_method : Union [ str , Tensor ] = \"posterior\" , num_init_samples : int = 1_000 , save_best_every : int = 10 , show_progress_bars : bool = False , force_update : bool = False , ) -> Tensor : r \"\"\"Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in `self._map` and can be accessed with `self.map()`. The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a `BoxUniform`, we carry out the optimization in unbounded space and transform the result back into bounded space. Args: x: Deprecated - use `.set_default_x()` prior to `.map()`. num_iter: Number of optimization steps that the algorithm takes to find the MAP. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples the respective distribution `num_init_samples` times. If it is a tensor, the tensor will be used as init locations. num_init_samples: Draw this number of samples from the posterior and evaluate the log-probability of all of them. num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` with highest log-probability as the initial points for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is `10`.) show_progress_bars: Whether to show a progressbar during sampling from the posterior. force_update: Whether to re-calculate the MAP when x is unchanged and have a cached value. log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain {'norm_posterior': True} for SNPE. Returns: The MAP estimate. \"\"\" return super () . map ( x = x , num_iter = num_iter , num_to_optimize = num_to_optimize , learning_rate = learning_rate , init_method = init_method , num_init_samples = num_init_samples , save_best_every = save_best_every , show_progress_bars = show_progress_bars , force_update = force_update , ) potential ( self , theta , x = None , track_gradients = False ) inherited \u00b6 Evaluates \\(\\theta\\) under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of \\(\\theta\\) under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Source code in sbi/inference/posteriors/direct_posterior.py def potential ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Evaluates $\\theta$ under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of $\\theta$ under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) sample ( self , sample_shape = torch . Size ([]), x = None , max_sampling_batch_size = 10000 , sample_with = None , show_progress_bars = True ) \u00b6 Return samples from posterior distribution \\(p(\\theta|x)\\) . Parameters: Name Type Description Default sample_shape Union[torch.Size, Tuple[int, ...]] Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw sample_shape.numel() samples and then reshape into the desired shape. torch.Size([]) sample_with Optional[str] This argument only exists to keep backward-compatibility with sbi v0.17.2 or older. If it is set, we instantly raise an error. None show_progress_bars bool Whether to show sampling progress monitor. True Source code in sbi/inference/posteriors/direct_posterior.py def sample ( self , sample_shape : Shape = torch . Size (), x : Optional [ Tensor ] = None , max_sampling_batch_size : int = 10_000 , sample_with : Optional [ str ] = None , show_progress_bars : bool = True , ) -> Tensor : r \"\"\"Return samples from posterior distribution $p(\\theta|x)$. Args: sample_shape: Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw `sample_shape.numel()` samples and then reshape into the desired shape. sample_with: This argument only exists to keep backward-compatibility with `sbi` v0.17.2 or older. If it is set, we instantly raise an error. show_progress_bars: Whether to show sampling progress monitor. \"\"\" num_samples = torch . Size ( sample_shape ) . numel () x = self . _x_else_default_x ( x ) max_sampling_batch_size = ( self . max_sampling_batch_size if max_sampling_batch_size is None else max_sampling_batch_size ) if sample_with is not None : raise ValueError ( f \"You set `sample_with= { sample_with } `. As of sbi v0.18.0, setting \" f \"`sample_with` is no longer supported. You have to rerun \" f \"`.build_posterior(sample_with= { sample_with } ).`\" ) samples = accept_reject_sample ( proposal = self . posterior_estimator , accept_reject_fn = lambda theta : within_support ( self . prior , theta ), num_samples = num_samples , show_progress_bars = show_progress_bars , max_sampling_batch_size = max_sampling_batch_size , proposal_sampling_kwargs = { \"context\" : x }, alternative_method = \"build_posterior(..., sample_with='mcmc')\" , )[ 0 ] return samples set_default_x ( self , x ) inherited \u00b6 Set new default x for .sample(), .log_prob to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify x in calls to .sample() and .log_prob() - only $ heta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular x=x_o (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like posterior.set_default_x(my_x).sample(mytheta) are possible. Parameters: Name Type Description Default x Tensor The default observation to set for the posterior \\(p( heta|x)\\) . required Returns: Type Description NeuralPosterior NeuralPosterior that will use a default x when not explicitly passed. Source code in sbi/inference/posteriors/direct_posterior.py def set_default_x ( self , x : Tensor ) -> \"NeuralPosterior\" : \"\"\"Set new default x for `.sample(), .log_prob` to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify `x` in calls to `.sample()` and `.log_prob()` - only $\\theta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular `x=x_o` (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like `posterior.set_default_x(my_x).sample(mytheta)` are possible. Args: x: The default observation to set for the posterior $p(\\theta|x)$. Returns: `NeuralPosterior` that will use a default `x` when not explicitly passed. \"\"\" self . _x = process_x ( x , x_shape = self . _x_shape , allow_iid_x = self . potential_fn . allow_iid_x ) . to ( self . _device ) self . _map = None return self sbi.inference.posteriors.importance_posterior.ImportanceSamplingPosterior ( NeuralPosterior ) \u00b6 Provides importance sampling to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). ImportanceSamplingPosterior allows to estimate the posterior log-probability by estimating the normlalization constant with importance sampling. It also allows to perform importance sampling (with .sample() ) and to draw approximate samples with sampling-importance-resampling (SIR) (with .sir_sample() ) default_x : Optional [ torch . Tensor ] inherited property writable \u00b6 Return default x used by .sample(), .log_prob as conditioning context. __init__ ( self , potential_fn , proposal , theta_transform = None , method = 'sir' , oversampling_factor = 32 , max_sampling_batch_size = 10000 , device = None , x_shape = None ) special \u00b6 Parameters: Name Type Description Default potential_fn Callable The potential function from which to draw samples. required proposal Any The proposal distribution. required theta_transform Optional[torch Transform] Transformation that is applied to parameters. Is not used during but only when calling .map() . None method str Either of [ sir | importance ]. This sets the behavior of the .sample() method. With sir , approximate posterior samples are generated with sampling importance resampling (SIR). With importance , the .sample() method returns a tuple of samples and corresponding importance weights. 'sir' oversampling_factor int Number of proposed samples from which only one is selected based on its importance weight. 32 max_sampling_batch_size int The batch size of samples being drawn from the proposal at every iteration. 10000 device Optional[str] Device on which to sample, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:0\u201d. If None, potential_fn.device is used. None x_shape Optional[torch.Size] Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. None Source code in sbi/inference/posteriors/importance_posterior.py def __init__ ( self , potential_fn : Callable , proposal : Any , theta_transform : Optional [ TorchTransform ] = None , method : str = \"sir\" , oversampling_factor : int = 32 , max_sampling_batch_size : int = 10_000 , device : Optional [ str ] = None , x_shape : Optional [ torch . Size ] = None , ): \"\"\" Args: potential_fn: The potential function from which to draw samples. proposal: The proposal distribution. theta_transform: Transformation that is applied to parameters. Is not used during but only when calling `.map()`. method: Either of [`sir`|`importance`]. This sets the behavior of the `.sample()` method. With `sir`, approximate posterior samples are generated with sampling importance resampling (SIR). With `importance`, the `.sample()` method returns a tuple of samples and corresponding importance weights. oversampling_factor: Number of proposed samples from which only one is selected based on its importance weight. max_sampling_batch_size: The batch size of samples being drawn from the proposal at every iteration. device: Device on which to sample, e.g., \"cpu\", \"cuda\" or \"cuda:0\". If None, `potential_fn.device` is used. x_shape: Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. \"\"\" super () . __init__ ( potential_fn , theta_transform = theta_transform , device = device , x_shape = x_shape , ) self . proposal = proposal self . _normalization_constant = None self . method = method self . oversampling_factor = oversampling_factor self . max_sampling_batch_size = max_sampling_batch_size self . _purpose = ( \"It provides sampling-importance resampling (SIR) to .sample() from the \" \"posterior and can evaluate the _unnormalized_ posterior density with \" \".log_prob().\" ) estimate_normalization_constant ( self , x , num_samples = 10000 , force_update = False ) \u00b6 Returns the normalization constant via importance sampling. Parameters: Name Type Description Default num_samples int Number of importance samples used for the estimate. 10000 force_update bool Whether to re-calculate the normlization constant when x is unchanged and have a cached value. False Source code in sbi/inference/posteriors/importance_posterior.py @torch . no_grad () def estimate_normalization_constant ( self , x : Tensor , num_samples : int = 10_000 , force_update : bool = False ) -> Tensor : \"\"\"Returns the normalization constant via importance sampling. Args: num_samples: Number of importance samples used for the estimate. force_update: Whether to re-calculate the normlization constant when x is unchanged and have a cached value. \"\"\" # Check if the provided x matches the default x (short-circuit on identity). is_new_x = self . default_x is None or ( x is not self . default_x and ( x != self . default_x ) . any () ) not_saved_at_default_x = self . _normalization_constant is None if is_new_x : # Calculate at x; don't save. _ , log_importance_weights = importance_sample ( self . potential_fn , proposal = self . proposal , num_samples = num_samples , ) return torch . mean ( torch . exp ( log_importance_weights )) elif not_saved_at_default_x or force_update : # Calculate at default_x; save. assert self . default_x is not None _ , log_importance_weights = importance_sample ( self . potential_fn , proposal = self . proposal , num_samples = num_samples , ) self . _normalization_constant = torch . mean ( torch . exp ( log_importance_weights )) return self . _normalization_constant . to ( self . _device ) # type: ignore log_prob ( self , theta , x = None , track_gradients = False , normalization_constant_params = None ) \u00b6 Returns the log-probability of theta under the posterior. The normalization constant is estimated with importance sampling. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False normalization_constant_params Optional[dict] Parameters passed on to estimate_normalization_constant() . None Returns: Type Description Tensor len($\\theta$) -shaped log-probability. Source code in sbi/inference/posteriors/importance_posterior.py def log_prob ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False , normalization_constant_params : Optional [ dict ] = None , ) -> Tensor : r \"\"\"Returns the log-probability of theta under the posterior. The normalization constant is estimated with importance sampling. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. normalization_constant_params: Parameters passed on to `estimate_normalization_constant()`. Returns: `len($\\theta$)`-shaped log-probability. \"\"\" x = self . _x_else_default_x ( x ) self . potential_fn . set_x ( x ) theta = ensure_theta_batched ( torch . as_tensor ( theta )) with torch . set_grad_enabled ( track_gradients ): potential_values = self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) if normalization_constant_params is None : normalization_constant_params = dict () # use defaults normalization_constant = self . estimate_normalization_constant ( x , ** normalization_constant_params ) return ( potential_values - torch . log ( normalization_constant )) . to ( self . _device ) map ( self , x = None , num_iter = 1000 , num_to_optimize = 100 , learning_rate = 0.01 , init_method = 'proposal' , num_init_samples = 1000 , save_best_every = 10 , show_progress_bars = False , force_update = False ) \u00b6 Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map() . The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a BoxUniform , we carry out the optimization in unbounded space and transform the result back into bounded space. Parameters: Name Type Description Default x Optional[torch.Tensor] Deprecated - use .set_default_x() prior to .map() . None num_iter int Number of optimization steps that the algorithm takes to find the MAP. 1000 learning_rate float Learning rate of the optimizer. 0.01 init_method Union[str, torch.Tensor] How to select the starting parameters for the optimization. If it is a string, it can be either [ posterior , prior ], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations. 'proposal' num_init_samples int Draw this number of samples from the posterior and evaluate the log-probability of all of them. 1000 num_to_optimize int From the drawn num_init_samples , use the num_to_optimize with highest log-probability as the initial points for the optimization. 100 save_best_every int The best log-probability is computed, saved in the map -attribute, and printed every save_best_every -th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10 .) 10 show_progress_bars bool Whether to show a progressbar during sampling from the posterior. False force_update bool Whether to re-calculate the MAP when x is unchanged and have a cached value. False log_prob_kwargs Will be empty for SNLE and SNRE. Will contain {\u2018norm_posterior\u2019: True} for SNPE. required Returns: Type Description Tensor The MAP estimate. Source code in sbi/inference/posteriors/importance_posterior.py def map ( self , x : Optional [ Tensor ] = None , num_iter : int = 1_000 , num_to_optimize : int = 100 , learning_rate : float = 0.01 , init_method : Union [ str , Tensor ] = \"proposal\" , num_init_samples : int = 1_000 , save_best_every : int = 10 , show_progress_bars : bool = False , force_update : bool = False , ) -> Tensor : r \"\"\"Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in `self._map` and can be accessed with `self.map()`. The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a `BoxUniform`, we carry out the optimization in unbounded space and transform the result back into bounded space. Args: x: Deprecated - use `.set_default_x()` prior to `.map()`. num_iter: Number of optimization steps that the algorithm takes to find the MAP. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples the respective distribution `num_init_samples` times. If it is a tensor, the tensor will be used as init locations. num_init_samples: Draw this number of samples from the posterior and evaluate the log-probability of all of them. num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` with highest log-probability as the initial points for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is `10`.) show_progress_bars: Whether to show a progressbar during sampling from the posterior. force_update: Whether to re-calculate the MAP when x is unchanged and have a cached value. log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain {'norm_posterior': True} for SNPE. Returns: The MAP estimate. \"\"\" return super () . map ( x = x , num_iter = num_iter , num_to_optimize = num_to_optimize , learning_rate = learning_rate , init_method = init_method , num_init_samples = num_init_samples , save_best_every = save_best_every , show_progress_bars = show_progress_bars , force_update = force_update , ) potential ( self , theta , x = None , track_gradients = False ) inherited \u00b6 Evaluates \\(\\theta\\) under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of \\(\\theta\\) under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Source code in sbi/inference/posteriors/importance_posterior.py def potential ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Evaluates $\\theta$ under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of $\\theta$ under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) sample ( self , sample_shape = torch . Size ([]), x = None , oversampling_factor = 32 , max_sampling_batch_size = 10000 , sample_with = None ) \u00b6 Return samples from the approximate posterior distribution. Parameters: Name Type Description Default sample_shape Union[torch.Size, Tuple[int, ...]] description torch.Size([]) x Optional[torch.Tensor] description None Source code in sbi/inference/posteriors/importance_posterior.py def sample ( self , sample_shape : Shape = torch . Size (), x : Optional [ Tensor ] = None , oversampling_factor : int = 32 , max_sampling_batch_size : int = 10_000 , sample_with : Optional [ str ] = None , ) -> Union [ Tensor , Tuple [ Tensor , Tensor ]]: \"\"\"Return samples from the approximate posterior distribution. Args: sample_shape: _description_ x: _description_ \"\"\" if sample_with is not None : raise ValueError ( f \"You set `sample_with= { sample_with } `. As of sbi v0.18.0, setting \" f \"`sample_with` is no longer supported. You have to rerun \" f \"`.build_posterior(sample_with= { sample_with } ).`\" ) self . potential_fn . set_x ( self . _x_else_default_x ( x )) if self . method == \"sir\" : return self . _sir_sample ( sample_shape , oversampling_factor = oversampling_factor , max_sampling_batch_size = max_sampling_batch_size , ) elif self . method == \"importance\" : return self . _importance_sample ( sample_shape ) else : raise NameError set_default_x ( self , x ) inherited \u00b6 Set new default x for .sample(), .log_prob to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify x in calls to .sample() and .log_prob() - only $ heta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular x=x_o (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like posterior.set_default_x(my_x).sample(mytheta) are possible. Parameters: Name Type Description Default x Tensor The default observation to set for the posterior \\(p( heta|x)\\) . required Returns: Type Description NeuralPosterior NeuralPosterior that will use a default x when not explicitly passed. Source code in sbi/inference/posteriors/importance_posterior.py def set_default_x ( self , x : Tensor ) -> \"NeuralPosterior\" : \"\"\"Set new default x for `.sample(), .log_prob` to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify `x` in calls to `.sample()` and `.log_prob()` - only $\\theta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular `x=x_o` (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like `posterior.set_default_x(my_x).sample(mytheta)` are possible. Args: x: The default observation to set for the posterior $p(\\theta|x)$. Returns: `NeuralPosterior` that will use a default `x` when not explicitly passed. \"\"\" self . _x = process_x ( x , x_shape = self . _x_shape , allow_iid_x = self . potential_fn . allow_iid_x ) . to ( self . _device ) self . _map = None return self sbi.inference.posteriors.mcmc_posterior.MCMCPosterior ( NeuralPosterior ) \u00b6 Provides MCMC to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). MCMCPosterior allows to sample from the posterior with MCMC. default_x : Optional [ torch . Tensor ] inherited property writable \u00b6 Return default x used by .sample(), .log_prob as conditioning context. mcmc_method : str property writable \u00b6 Returns MCMC method. posterior_sampler property readonly \u00b6 Returns sampler created by sample . __init__ ( self , potential_fn , proposal , theta_transform = None , method = 'slice_np' , thin = 10 , warmup_steps = 10 , num_chains = 1 , init_strategy = 'resample' , init_strategy_parameters = {}, init_strategy_num_candidates = None , num_workers = 1 , device = None , x_shape = None ) special \u00b6 Parameters: Name Type Description Default potential_fn Callable The potential function from which to draw samples. required proposal Any Proposal distribution that is used to initialize the MCMC chain. required theta_transform Optional[torch Transform] Transformation that will be applied during sampling. Allows to perform MCMC in unconstrained space. None method str Method used for MCMC sampling, one of slice_np , slice_np_vectorized , slice , hmc , nuts . slice_np is a custom numpy implementation of slice sampling. slice_np_vectorized is identical to slice_np , but if num_chains>1 , the chains are vectorized for slice_np_vectorized whereas they are run sequentially for slice_np . The samplers hmc , nuts or slice sample with Pyro. 'slice_np' thin int The thinning factor for the chain. 10 warmup_steps int The initial number of samples to discard. 10 num_chains int The number of chains. 1 init_strategy str The initialisation strategy for chains; proposal will draw init locations from proposal , whereas sir will use Sequential- Importance-Resampling (SIR). SIR initially samples init_strategy_num_candidates from the proposal , evaluates all of them under the potential_fn and proposal , and then resamples the initial locations with weights proportional to exp(potential_fn - proposal.log_prob . resample is the same as sir but uses exp(potential_fn) as weights. 'resample' init_strategy_parameters Dict[str, Any] Dictionary of keyword arguments passed to the init strategy, e.g., for init_strategy=sir this could be num_candidate_samples , i.e., the number of candidates to to find init locations (internal default is 1000 ), or device . {} init_strategy_num_candidates Optional[int] Number of candidates to to find init locations in init_strategy=sir (deprecated, use init_strategy_parameters instead). None num_workers int number of cpu cores used to parallelize mcmc 1 device Optional[str] Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:0\u201d. If None, potential_fn.device is used. None x_shape Optional[torch.Size] Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. None Source code in sbi/inference/posteriors/mcmc_posterior.py def __init__ ( self , potential_fn : Callable , proposal : Any , theta_transform : Optional [ TorchTransform ] = None , method : str = \"slice_np\" , thin : int = 10 , warmup_steps : int = 10 , num_chains : int = 1 , init_strategy : str = \"resample\" , init_strategy_parameters : Dict [ str , Any ] = {}, init_strategy_num_candidates : Optional [ int ] = None , num_workers : int = 1 , device : Optional [ str ] = None , x_shape : Optional [ torch . Size ] = None , ): \"\"\" Args: potential_fn: The potential function from which to draw samples. proposal: Proposal distribution that is used to initialize the MCMC chain. theta_transform: Transformation that will be applied during sampling. Allows to perform MCMC in unconstrained space. method: Method used for MCMC sampling, one of `slice_np`, `slice_np_vectorized`, `slice`, `hmc`, `nuts`. `slice_np` is a custom numpy implementation of slice sampling. `slice_np_vectorized` is identical to `slice_np`, but if `num_chains>1`, the chains are vectorized for `slice_np_vectorized` whereas they are run sequentially for `slice_np`. The samplers `hmc`, `nuts` or `slice` sample with Pyro. thin: The thinning factor for the chain. warmup_steps: The initial number of samples to discard. num_chains: The number of chains. init_strategy: The initialisation strategy for chains; `proposal` will draw init locations from `proposal`, whereas `sir` will use Sequential- Importance-Resampling (SIR). SIR initially samples `init_strategy_num_candidates` from the `proposal`, evaluates all of them under the `potential_fn` and `proposal`, and then resamples the initial locations with weights proportional to `exp(potential_fn - proposal.log_prob`. `resample` is the same as `sir` but uses `exp(potential_fn)` as weights. init_strategy_parameters: Dictionary of keyword arguments passed to the init strategy, e.g., for `init_strategy=sir` this could be `num_candidate_samples`, i.e., the number of candidates to to find init locations (internal default is `1000`), or `device`. init_strategy_num_candidates: Number of candidates to to find init locations in `init_strategy=sir` (deprecated, use init_strategy_parameters instead). num_workers: number of cpu cores used to parallelize mcmc device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:0\". If None, `potential_fn.device` is used. x_shape: Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. \"\"\" super () . __init__ ( potential_fn , theta_transform = theta_transform , device = device , x_shape = x_shape , ) self . proposal = proposal self . method = method self . thin = thin self . warmup_steps = warmup_steps self . num_chains = num_chains self . init_strategy = init_strategy self . init_strategy_parameters = init_strategy_parameters self . num_workers = num_workers self . _posterior_sampler = None # Hardcode parameter name to reduce clutter kwargs. self . param_name = \"theta\" if init_strategy_num_candidates is not None : warn ( \"\"\"Passing `init_strategy_num_candidates` is deprecated as of sbi v0.19.0. Instead, use e.g., `init_strategy_parameters={\"num_candidate_samples\": 1000}`\"\"\" ) self . init_strategy_parameters [ \"num_candidate_samples\" ] = init_strategy_num_candidates self . potential_ = self . _prepare_potential ( method ) self . _purpose = ( \"It provides MCMC to .sample() from the posterior and \" \"can evaluate the _unnormalized_ posterior density with .log_prob().\" ) get_arviz_inference_data ( self ) \u00b6 Returns arviz InferenceData object constructed most recent samples. Note: the InferenceData is constructed using the posterior samples generated in most recent call to .sample(...) . For Pyro HMC and NUTS kernels InferenceData will contain diagnostics, for Pyro Slice or sbi slice sampling samples, only the samples are added. Returns: Type Description inference_data Arviz InferenceData object. Source code in sbi/inference/posteriors/mcmc_posterior.py def get_arviz_inference_data ( self ) -> InferenceData : \"\"\"Returns arviz InferenceData object constructed most recent samples. Note: the InferenceData is constructed using the posterior samples generated in most recent call to `.sample(...)`. For Pyro HMC and NUTS kernels InferenceData will contain diagnostics, for Pyro Slice or sbi slice sampling samples, only the samples are added. Returns: inference_data: Arviz InferenceData object. \"\"\" assert ( self . _posterior_sampler is not None ), \"\"\"No samples have been generated, call .sample() first.\"\"\" sampler : Union [ MCMC , SliceSamplerSerial , SliceSamplerVectorized ] = self . _posterior_sampler # If Pyro sampler and samples not transformed, use arviz' from_pyro. # Exclude 'slice' kernel as it lacks the 'divergence' diagnostics key. if isinstance ( self . _posterior_sampler , ( HMC , NUTS )) and isinstance ( self . theta_transform , torch_tf . IndependentTransform ): inference_data = az . from_pyro ( sampler ) # otherwise get samples from sampler and transform to original space. else : transformed_samples = sampler . get_samples ( group_by_chain = True ) # Pyro samplers returns dicts, get values. if isinstance ( transformed_samples , Dict ): # popitem gets last items, [1] get the values as tensor. transformed_samples = transformed_samples . popitem ()[ 1 ] # Our slice samplers return numpy arrays. elif isinstance ( transformed_samples , ndarray ): transformed_samples = torch . from_numpy ( transformed_samples ) . type ( torch . float32 ) # For MultipleIndependent priors transforms first dim must be batch dim. # thus, reshape back and forth to have batch dim in front. samples_shape = transformed_samples . shape samples = self . theta_transform . inv ( # type: ignore transformed_samples . reshape ( - 1 , samples_shape [ - 1 ]) ) . reshape ( # type: ignore * samples_shape ) inference_data = az . convert_to_inference_data ( { f \" { self . param_name } \" : samples } ) return inference_data log_prob ( self , theta , x = None , track_gradients = False ) \u00b6 Returns the log-probability of theta under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Returns: Type Description Tensor len($\\theta$) -shaped log-probability. Source code in sbi/inference/posteriors/mcmc_posterior.py def log_prob ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Returns the log-probability of theta under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. Returns: `len($\\theta$)`-shaped log-probability. \"\"\" warn ( \"\"\"`.log_prob()` is deprecated for methods that can only evaluate the log-probability up to a normalizing constant. Use `.potential()` instead.\"\"\" ) warn ( \"The log-probability is unnormalized!\" ) self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) map ( self , x = None , num_iter = 1000 , num_to_optimize = 100 , learning_rate = 0.01 , init_method = 'proposal' , num_init_samples = 1000 , save_best_every = 10 , show_progress_bars = False , force_update = False ) \u00b6 Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map() . The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a BoxUniform , we carry out the optimization in unbounded space and transform the result back into bounded space. Parameters: Name Type Description Default x Optional[torch.Tensor] Deprecated - use .set_default_x() prior to .map() . None num_iter int Number of optimization steps that the algorithm takes to find the MAP. 1000 learning_rate float Learning rate of the optimizer. 0.01 init_method Union[str, torch.Tensor] How to select the starting parameters for the optimization. If it is a string, it can be either [ posterior , prior ], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations. 'proposal' num_init_samples int Draw this number of samples from the posterior and evaluate the log-probability of all of them. 1000 num_to_optimize int From the drawn num_init_samples , use the num_to_optimize with highest log-probability as the initial points for the optimization. 100 save_best_every int The best log-probability is computed, saved in the map -attribute, and printed every save_best_every -th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10 .) 10 show_progress_bars bool Whether to show a progressbar during sampling from the posterior. False force_update bool Whether to re-calculate the MAP when x is unchanged and have a cached value. False log_prob_kwargs Will be empty for SNLE and SNRE. Will contain {\u2018norm_posterior\u2019: True} for SNPE. required Returns: Type Description Tensor The MAP estimate. Source code in sbi/inference/posteriors/mcmc_posterior.py def map ( self , x : Optional [ Tensor ] = None , num_iter : int = 1_000 , num_to_optimize : int = 100 , learning_rate : float = 0.01 , init_method : Union [ str , Tensor ] = \"proposal\" , num_init_samples : int = 1_000 , save_best_every : int = 10 , show_progress_bars : bool = False , force_update : bool = False , ) -> Tensor : r \"\"\"Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in `self._map` and can be accessed with `self.map()`. The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a `BoxUniform`, we carry out the optimization in unbounded space and transform the result back into bounded space. Args: x: Deprecated - use `.set_default_x()` prior to `.map()`. num_iter: Number of optimization steps that the algorithm takes to find the MAP. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples the respective distribution `num_init_samples` times. If it is a tensor, the tensor will be used as init locations. num_init_samples: Draw this number of samples from the posterior and evaluate the log-probability of all of them. num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` with highest log-probability as the initial points for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is `10`.) show_progress_bars: Whether to show a progressbar during sampling from the posterior. force_update: Whether to re-calculate the MAP when x is unchanged and have a cached value. log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain {'norm_posterior': True} for SNPE. Returns: The MAP estimate. \"\"\" return super () . map ( x = x , num_iter = num_iter , num_to_optimize = num_to_optimize , learning_rate = learning_rate , init_method = init_method , num_init_samples = num_init_samples , save_best_every = save_best_every , show_progress_bars = show_progress_bars , force_update = force_update , ) potential ( self , theta , x = None , track_gradients = False ) inherited \u00b6 Evaluates \\(\\theta\\) under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of \\(\\theta\\) under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Source code in sbi/inference/posteriors/mcmc_posterior.py def potential ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Evaluates $\\theta$ under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of $\\theta$ under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) sample ( self , sample_shape = torch . Size ([]), x = None , method = None , thin = None , warmup_steps = None , num_chains = None , init_strategy = None , init_strategy_parameters = None , init_strategy_num_candidates = None , mcmc_parameters = {}, mcmc_method = None , sample_with = None , num_workers = None , show_progress_bars = True ) \u00b6 Return samples from posterior distribution \\(p(\\theta|x)\\) with MCMC. Check the __init__() method for a description of all arguments as well as their default values. Parameters: Name Type Description Default sample_shape Union[torch.Size, Tuple[int, ...]] Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw sample_shape.numel() samples and then reshape into the desired shape. torch.Size([]) mcmc_parameters Dict Dictionary that is passed only to support the API of sbi v0.17.2 or older. {} mcmc_method Optional[str] This argument only exists to keep backward-compatibility with sbi v0.17.2 or older. Please use method instead. None sample_with Optional[str] This argument only exists to keep backward-compatibility with sbi v0.17.2 or older. If it is set, we instantly raise an error. None show_progress_bars bool Whether to show sampling progress monitor. True Returns: Type Description Tensor Samples from posterior. Source code in sbi/inference/posteriors/mcmc_posterior.py def sample ( self , sample_shape : Shape = torch . Size (), x : Optional [ Tensor ] = None , method : Optional [ str ] = None , thin : Optional [ int ] = None , warmup_steps : Optional [ int ] = None , num_chains : Optional [ int ] = None , init_strategy : Optional [ str ] = None , init_strategy_parameters : Optional [ Dict [ str , Any ]] = None , init_strategy_num_candidates : Optional [ int ] = None , mcmc_parameters : Dict = {}, mcmc_method : Optional [ str ] = None , sample_with : Optional [ str ] = None , num_workers : Optional [ int ] = None , show_progress_bars : bool = True , ) -> Tensor : r \"\"\"Return samples from posterior distribution $p(\\theta|x)$ with MCMC. Check the `__init__()` method for a description of all arguments as well as their default values. Args: sample_shape: Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw `sample_shape.numel()` samples and then reshape into the desired shape. mcmc_parameters: Dictionary that is passed only to support the API of `sbi` v0.17.2 or older. mcmc_method: This argument only exists to keep backward-compatibility with `sbi` v0.17.2 or older. Please use `method` instead. sample_with: This argument only exists to keep backward-compatibility with `sbi` v0.17.2 or older. If it is set, we instantly raise an error. show_progress_bars: Whether to show sampling progress monitor. Returns: Samples from posterior. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) # Replace arguments that were not passed with their default. method = self . method if method is None else method thin = self . thin if thin is None else thin warmup_steps = self . warmup_steps if warmup_steps is None else warmup_steps num_chains = self . num_chains if num_chains is None else num_chains init_strategy = self . init_strategy if init_strategy is None else init_strategy num_workers = self . num_workers if num_workers is None else num_workers init_strategy_parameters = ( self . init_strategy_parameters if init_strategy_parameters is None else init_strategy_parameters ) if init_strategy_num_candidates is not None : warn ( \"\"\"Passing `init_strategy_num_candidates` is deprecated as of sbi v0.19.0. Instead, use e.g., `init_strategy_parameters={\"num_candidate_samples\": 1000}`\"\"\" ) self . init_strategy_parameters [ \"num_candidate_samples\" ] = init_strategy_num_candidates if sample_with is not None : raise ValueError ( f \"You set `sample_with= { sample_with } `. As of sbi v0.18.0, setting \" f \"`sample_with` is no longer supported. You have to rerun \" f \"`.build_posterior(sample_with= { sample_with } ).`\" ) if mcmc_method is not None : warn ( \"You passed `mcmc_method` to `.sample()`. As of sbi v0.18.0, this \" \"is deprecated and will be removed in a future release. Use `method` \" \"instead of `mcmc_method`.\" ) method = mcmc_method if mcmc_parameters : warn ( \"You passed `mcmc_parameters` to `.sample()`. As of sbi v0.18.0, this \" \"is deprecated and will be removed in a future release. Instead, pass \" \"the variable to `.sample()` directly, e.g. \" \"`posterior.sample((1,), num_chains=5)`.\" ) # The following lines are only for backwards compatibility with sbi v0.17.2 or # older. m_p = mcmc_parameters # define to shorten the variable name method = _maybe_use_dict_entry ( method , \"mcmc_method\" , m_p ) thin = _maybe_use_dict_entry ( thin , \"thin\" , m_p ) warmup_steps = _maybe_use_dict_entry ( warmup_steps , \"warmup_steps\" , m_p ) num_chains = _maybe_use_dict_entry ( num_chains , \"num_chains\" , m_p ) init_strategy = _maybe_use_dict_entry ( init_strategy , \"init_strategy\" , m_p ) self . potential_ = self . _prepare_potential ( method ) # type: ignore initial_params = self . _get_initial_params ( init_strategy , # type: ignore num_chains , # type: ignore num_workers , show_progress_bars , ** init_strategy_parameters , ) num_samples = torch . Size ( sample_shape ) . numel () track_gradients = method in ( \"hmc\" , \"nuts\" ) with torch . set_grad_enabled ( track_gradients ): if method in ( \"slice_np\" , \"slice_np_vectorized\" ): transformed_samples = self . _slice_np_mcmc ( num_samples = num_samples , potential_function = self . potential_ , initial_params = initial_params , thin = thin , # type: ignore warmup_steps = warmup_steps , # type: ignore vectorized = ( method == \"slice_np_vectorized\" ), num_workers = num_workers , show_progress_bars = show_progress_bars , ) elif method in ( \"hmc\" , \"nuts\" , \"slice\" ): transformed_samples = self . _pyro_mcmc ( num_samples = num_samples , potential_function = self . potential_ , initial_params = initial_params , mcmc_method = method , # type: ignore thin = thin , # type: ignore warmup_steps = warmup_steps , # type: ignore num_chains = num_chains , show_progress_bars = show_progress_bars , ) else : raise NameError samples = self . theta_transform . inv ( transformed_samples ) return samples . reshape (( * sample_shape , - 1 )) # type: ignore set_default_x ( self , x ) inherited \u00b6 Set new default x for .sample(), .log_prob to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify x in calls to .sample() and .log_prob() - only $ heta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular x=x_o (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like posterior.set_default_x(my_x).sample(mytheta) are possible. Parameters: Name Type Description Default x Tensor The default observation to set for the posterior \\(p( heta|x)\\) . required Returns: Type Description NeuralPosterior NeuralPosterior that will use a default x when not explicitly passed. Source code in sbi/inference/posteriors/mcmc_posterior.py def set_default_x ( self , x : Tensor ) -> \"NeuralPosterior\" : \"\"\"Set new default x for `.sample(), .log_prob` to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify `x` in calls to `.sample()` and `.log_prob()` - only $\\theta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular `x=x_o` (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like `posterior.set_default_x(my_x).sample(mytheta)` are possible. Args: x: The default observation to set for the posterior $p(\\theta|x)$. Returns: `NeuralPosterior` that will use a default `x` when not explicitly passed. \"\"\" self . _x = process_x ( x , x_shape = self . _x_shape , allow_iid_x = self . potential_fn . allow_iid_x ) . to ( self . _device ) self . _map = None return self set_mcmc_method ( self , method ) \u00b6 Sets sampling method to for MCMC and returns NeuralPosterior . Parameters: Name Type Description Default method str Method to use. required Returns: Type Description NeuralPosterior NeuralPosterior for chainable calls. Source code in sbi/inference/posteriors/mcmc_posterior.py def set_mcmc_method ( self , method : str ) -> \"NeuralPosterior\" : \"\"\"Sets sampling method to for MCMC and returns `NeuralPosterior`. Args: method: Method to use. Returns: `NeuralPosterior` for chainable calls. \"\"\" self . _mcmc_method = method return self sbi.inference.posteriors.rejection_posterior.RejectionPosterior ( NeuralPosterior ) \u00b6 Provides rejection sampling to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). RejectionPosterior allows to sample from the posterior with rejection sampling. default_x : Optional [ torch . Tensor ] inherited property writable \u00b6 Return default x used by .sample(), .log_prob as conditioning context. __init__ ( self , potential_fn , proposal , theta_transform = None , max_sampling_batch_size = 10000 , num_samples_to_find_max = 10000 , num_iter_to_find_max = 100 , m = 1.2 , device = None , x_shape = None ) special \u00b6 Parameters: Name Type Description Default potential_fn Callable The potential function from which to draw samples. required proposal Any The proposal distribution. required theta_transform Optional[torch Transform] Transformation that is applied to parameters. Is not used during but only when calling .map() . None max_sampling_batch_size int The batchsize of samples being drawn from the proposal at every iteration. 10000 num_samples_to_find_max int The number of samples that are used to find the maximum of the potential_fn / proposal ratio. 10000 num_iter_to_find_max int The number of gradient ascent iterations to find the maximum of the potential_fn / proposal ratio. 100 m float Multiplier to the potential_fn / proposal ratio. 1.2 device Optional[str] Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:0\u201d. If None, potential_fn.device is used. None x_shape Optional[torch.Size] Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. None Source code in sbi/inference/posteriors/rejection_posterior.py def __init__ ( self , potential_fn : Callable , proposal : Any , theta_transform : Optional [ TorchTransform ] = None , max_sampling_batch_size : int = 10_000 , num_samples_to_find_max : int = 10_000 , num_iter_to_find_max : int = 100 , m : float = 1.2 , device : Optional [ str ] = None , x_shape : Optional [ torch . Size ] = None , ): \"\"\" Args: potential_fn: The potential function from which to draw samples. proposal: The proposal distribution. theta_transform: Transformation that is applied to parameters. Is not used during but only when calling `.map()`. max_sampling_batch_size: The batchsize of samples being drawn from the proposal at every iteration. num_samples_to_find_max: The number of samples that are used to find the maximum of the `potential_fn / proposal` ratio. num_iter_to_find_max: The number of gradient ascent iterations to find the maximum of the `potential_fn / proposal` ratio. m: Multiplier to the `potential_fn / proposal` ratio. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:0\". If None, `potential_fn.device` is used. x_shape: Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. \"\"\" super () . __init__ ( potential_fn , theta_transform = theta_transform , device = device , x_shape = x_shape , ) self . proposal = proposal self . max_sampling_batch_size = max_sampling_batch_size self . num_samples_to_find_max = num_samples_to_find_max self . num_iter_to_find_max = num_iter_to_find_max self . m = m self . _purpose = ( \"It provides rejection sampling to .sample() from the posterior and \" \"can evaluate the _unnormalized_ posterior density with .log_prob().\" ) log_prob ( self , theta , x = None , track_gradients = False ) \u00b6 Returns the log-probability of theta under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Returns: Type Description Tensor len($\\theta$) -shaped log-probability. Source code in sbi/inference/posteriors/rejection_posterior.py def log_prob ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Returns the log-probability of theta under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. Returns: `len($\\theta$)`-shaped log-probability. \"\"\" warn ( \"`.log_prob()` is deprecated for methods that can only evaluate the log-probability up to a normalizing constant. Use `.potential()` instead.\" ) warn ( \"The log-probability is unnormalized!\" ) self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) map ( self , x = None , num_iter = 1000 , num_to_optimize = 100 , learning_rate = 0.01 , init_method = 'proposal' , num_init_samples = 1000 , save_best_every = 10 , show_progress_bars = False , force_update = False ) \u00b6 Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map() . The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a BoxUniform , we carry out the optimization in unbounded space and transform the result back into bounded space. Parameters: Name Type Description Default x Optional[torch.Tensor] Deprecated - use .set_default_x() prior to .map() . None num_iter int Number of optimization steps that the algorithm takes to find the MAP. 1000 learning_rate float Learning rate of the optimizer. 0.01 init_method Union[str, torch.Tensor] How to select the starting parameters for the optimization. If it is a string, it can be either [ posterior , prior ], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations. 'proposal' num_init_samples int Draw this number of samples from the posterior and evaluate the log-probability of all of them. 1000 num_to_optimize int From the drawn num_init_samples , use the num_to_optimize with highest log-probability as the initial points for the optimization. 100 save_best_every int The best log-probability is computed, saved in the map -attribute, and printed every save_best_every -th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10 .) 10 show_progress_bars bool Whether to show a progressbar during sampling from the posterior. False force_update bool Whether to re-calculate the MAP when x is unchanged and have a cached value. False log_prob_kwargs Will be empty for SNLE and SNRE. Will contain {\u2018norm_posterior\u2019: True} for SNPE. required Returns: Type Description Tensor The MAP estimate. Source code in sbi/inference/posteriors/rejection_posterior.py def map ( self , x : Optional [ Tensor ] = None , num_iter : int = 1_000 , num_to_optimize : int = 100 , learning_rate : float = 0.01 , init_method : Union [ str , Tensor ] = \"proposal\" , num_init_samples : int = 1_000 , save_best_every : int = 10 , show_progress_bars : bool = False , force_update : bool = False , ) -> Tensor : r \"\"\"Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in `self._map` and can be accessed with `self.map()`. The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a `BoxUniform`, we carry out the optimization in unbounded space and transform the result back into bounded space. Args: x: Deprecated - use `.set_default_x()` prior to `.map()`. num_iter: Number of optimization steps that the algorithm takes to find the MAP. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples the respective distribution `num_init_samples` times. If it is a tensor, the tensor will be used as init locations. num_init_samples: Draw this number of samples from the posterior and evaluate the log-probability of all of them. num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` with highest log-probability as the initial points for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is `10`.) show_progress_bars: Whether to show a progressbar during sampling from the posterior. force_update: Whether to re-calculate the MAP when x is unchanged and have a cached value. log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain {'norm_posterior': True} for SNPE. Returns: The MAP estimate. \"\"\" return super () . map ( x = x , num_iter = num_iter , num_to_optimize = num_to_optimize , learning_rate = learning_rate , init_method = init_method , num_init_samples = num_init_samples , save_best_every = save_best_every , show_progress_bars = show_progress_bars , force_update = force_update , ) potential ( self , theta , x = None , track_gradients = False ) inherited \u00b6 Evaluates \\(\\theta\\) under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of \\(\\theta\\) under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Source code in sbi/inference/posteriors/rejection_posterior.py def potential ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Evaluates $\\theta$ under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of $\\theta$ under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) sample ( self , sample_shape = torch . Size ([]), x = None , max_sampling_batch_size = None , num_samples_to_find_max = None , num_iter_to_find_max = None , m = None , sample_with = None , show_progress_bars = True ) \u00b6 Return samples from posterior \\(p(\\theta|x)\\) via rejection sampling. Parameters: Name Type Description Default sample_shape Union[torch.Size, Tuple[int, ...]] Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw sample_shape.numel() samples and then reshape into the desired shape. torch.Size([]) sample_with Optional[str] This argument only exists to keep backward-compatibility with sbi v0.17.2 or older. If it is set, we instantly raise an error. None show_progress_bars bool Whether to show sampling progress monitor. True Returns: Type Description Samples from posterior. Source code in sbi/inference/posteriors/rejection_posterior.py def sample ( self , sample_shape : Shape = torch . Size (), x : Optional [ Tensor ] = None , max_sampling_batch_size : Optional [ int ] = None , num_samples_to_find_max : Optional [ int ] = None , num_iter_to_find_max : Optional [ int ] = None , m : Optional [ float ] = None , sample_with : Optional [ str ] = None , show_progress_bars : bool = True , ): r \"\"\"Return samples from posterior $p(\\theta|x)$ via rejection sampling. Args: sample_shape: Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw `sample_shape.numel()` samples and then reshape into the desired shape. sample_with: This argument only exists to keep backward-compatibility with `sbi` v0.17.2 or older. If it is set, we instantly raise an error. show_progress_bars: Whether to show sampling progress monitor. Returns: Samples from posterior. \"\"\" num_samples = torch . Size ( sample_shape ) . numel () self . potential_fn . set_x ( self . _x_else_default_x ( x )) potential = partial ( self . potential_fn , track_gradients = True ) if sample_with is not None : raise ValueError ( f \"You set `sample_with= { sample_with } `. As of sbi v0.18.0, setting \" f \"`sample_with` is no longer supported. You have to rerun \" f \"`.build_posterior(sample_with= { sample_with } ).`\" ) # Replace arguments that were not passed with their default. max_sampling_batch_size = ( self . max_sampling_batch_size if max_sampling_batch_size is None else max_sampling_batch_size ) num_samples_to_find_max = ( self . num_samples_to_find_max if num_samples_to_find_max is None else num_samples_to_find_max ) num_iter_to_find_max = ( self . num_iter_to_find_max if num_iter_to_find_max is None else num_iter_to_find_max ) m = self . m if m is None else m samples , _ = rejection_sample ( potential , proposal = self . proposal , num_samples = num_samples , show_progress_bars = show_progress_bars , warn_acceptance = 0.01 , max_sampling_batch_size = max_sampling_batch_size , num_samples_to_find_max = num_samples_to_find_max , num_iter_to_find_max = num_iter_to_find_max , m = m , device = self . _device , ) return samples . reshape (( * sample_shape , - 1 )) set_default_x ( self , x ) inherited \u00b6 Set new default x for .sample(), .log_prob to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify x in calls to .sample() and .log_prob() - only $ heta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular x=x_o (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like posterior.set_default_x(my_x).sample(mytheta) are possible. Parameters: Name Type Description Default x Tensor The default observation to set for the posterior \\(p( heta|x)\\) . required Returns: Type Description NeuralPosterior NeuralPosterior that will use a default x when not explicitly passed. Source code in sbi/inference/posteriors/rejection_posterior.py def set_default_x ( self , x : Tensor ) -> \"NeuralPosterior\" : \"\"\"Set new default x for `.sample(), .log_prob` to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify `x` in calls to `.sample()` and `.log_prob()` - only $\\theta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular `x=x_o` (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like `posterior.set_default_x(my_x).sample(mytheta)` are possible. Args: x: The default observation to set for the posterior $p(\\theta|x)$. Returns: `NeuralPosterior` that will use a default `x` when not explicitly passed. \"\"\" self . _x = process_x ( x , x_shape = self . _x_shape , allow_iid_x = self . potential_fn . allow_iid_x ) . to ( self . _device ) self . _map = None return self sbi.inference.posteriors.vi_posterior.VIPosterior ( NeuralPosterior ) \u00b6 Provides VI (Variational Inference) to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). VIPosterior allows to learn a tractable variational posterior \\(q(\\theta)\\) which approximates the true posterior \\(p(\\theta|x_o)\\) . After this second training stage, we can produce approximate posterior samples, by just sampling from q with no additional cost. For additional information see [1] and [2]. References: [1] Variational methods for simulation-based inference, Manuel Gl\u00f6ckler, Michael Deistler, Jakob Macke, 2022, https://openreview.net/forum?id=kZ0UYdhqkNY [2] Sequential Neural Posterior and Likelihood Approximation, Samuel Wiqvist, Jes Frellsen, Umberto Picchini, 2021, https://arxiv.org/abs/2102.06522 default_x : Optional [ torch . Tensor ] inherited property writable \u00b6 Return default x used by .sample(), .log_prob as conditioning context. q : Distribution property writable \u00b6 Returns the variational posterior. vi_method : str property writable \u00b6 Variational inference method e.g. one of [rKL, fKL, IW, alpha]. __init__ ( self , potential_fn , prior = None , q = 'maf' , theta_transform = None , vi_method = 'rKL' , device = 'cpu' , x_shape = None , parameters = [], modules = []) special \u00b6 Parameters: Name Type Description Default potential_fn Callable The potential function from which to draw samples. required prior Optional[torch Distribution] This is the prior distribution. Note that this is only used to check/construct the variational distribution or within some quality metrics. Please make sure that this matches with the prior within the potential_fn. If None is given, we will try to infer it from potential_fn or q, if this fails we raise an Error. None q Union[str, pyro.distributions.torch.TransformedDistribution, VIPosterior, Callable] Variational distribution, either string, TransformedDistribution , or a VIPosterior object. This specifies a parametric class of distribution over which the best possible posterior approximation is searched. For string input, we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. You can also specify your own variational family by passing a pyro TransformedDistribution . Additionally, we allow a Callable , which allows you the pass a builder function, which if called returns a distribution. This may be useful for setting the hyperparameters e.g. num_transfroms within the get_flow_builder method specifying the number of transformations within a normalizing flow. If q is already a VIPosterior , then the arguments will be copied from it (relevant for multi-round training). 'maf' theta_transform Optional[torch Transform] Maps form prior support to unconstrained space. The inverse is used here to ensure that the posterior support is equal to that of the prior. None vi_method str This specifies the variational methods which are used to fit q to the posterior. We currently support [rKL, fKL, IW, alpha]. Note that some of the divergences are mode seeking i.e. they underestimate variance and collapse on multimodal targets ( rKL , alpha for alpha > 1) and some are mass covering i.e. they overestimate variance but typically cover all modes ( fKL , IW , alpha for alpha < 1). 'rKL' device str Training device, e.g., cpu , cuda or cuda:0 . We will ensure that all other objects are also on this device. 'cpu' x_shape Optional[torch.Size] Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. None parameters Iterable List of parameters of the variational posterior. This is only required for user-defined q i.e. if q does not have a parameters attribute. [] modules Iterable List of modules of the variational posterior. This is only required for user-defined q i.e. if q does not have a modules attribute. [] Source code in sbi/inference/posteriors/vi_posterior.py def __init__ ( self , potential_fn : Callable , prior : Optional [ TorchDistribution ] = None , q : Union [ str , PyroTransformedDistribution , \"VIPosterior\" , Callable ] = \"maf\" , theta_transform : Optional [ TorchTransform ] = None , vi_method : str = \"rKL\" , device : str = \"cpu\" , x_shape : Optional [ torch . Size ] = None , parameters : Iterable = [], modules : Iterable = [], ): \"\"\" Args: potential_fn: The potential function from which to draw samples. prior: This is the prior distribution. Note that this is only used to check/construct the variational distribution or within some quality metrics. Please make sure that this matches with the prior within the potential_fn. If `None` is given, we will try to infer it from potential_fn or q, if this fails we raise an Error. q: Variational distribution, either string, `TransformedDistribution`, or a `VIPosterior` object. This specifies a parametric class of distribution over which the best possible posterior approximation is searched. For string input, we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. You can also specify your own variational family by passing a pyro `TransformedDistribution`. Additionally, we allow a `Callable`, which allows you the pass a `builder` function, which if called returns a distribution. This may be useful for setting the hyperparameters e.g. `num_transfroms` within the `get_flow_builder` method specifying the number of transformations within a normalizing flow. If q is already a `VIPosterior`, then the arguments will be copied from it (relevant for multi-round training). theta_transform: Maps form prior support to unconstrained space. The inverse is used here to ensure that the posterior support is equal to that of the prior. vi_method: This specifies the variational methods which are used to fit q to the posterior. We currently support [rKL, fKL, IW, alpha]. Note that some of the divergences are `mode seeking` i.e. they underestimate variance and collapse on multimodal targets (`rKL`, `alpha` for alpha > 1) and some are `mass covering` i.e. they overestimate variance but typically cover all modes (`fKL`, `IW`, `alpha` for alpha < 1). device: Training device, e.g., `cpu`, `cuda` or `cuda:0`. We will ensure that all other objects are also on this device. x_shape: Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. parameters: List of parameters of the variational posterior. This is only required for user-defined q i.e. if q does not have a `parameters` attribute. modules: List of modules of the variational posterior. This is only required for user-defined q i.e. if q does not have a `modules` attribute. \"\"\" super () . __init__ ( potential_fn , theta_transform , device , x_shape = x_shape ) # Especially the prior may be on another device -> move it... self . _device = device self . potential_fn . device = device move_all_tensor_to_device ( self . potential_fn , device ) # Get prior and previous builds if prior is not None : self . _prior = prior elif hasattr ( self . potential_fn , \"prior\" ) and isinstance ( self . potential_fn . prior , Distribution ): self . _prior = self . potential_fn . prior elif isinstance ( q , VIPosterior ) and isinstance ( q . _prior , Distribution ): self . _prior = q . _prior else : raise ValueError ( \"We could not find a suitable prior distribution within `potential_fn`\" \"or `q` (if a VIPosterior is given). Please explicitly specify a prior.\" ) move_all_tensor_to_device ( self . _prior , device ) self . _optimizer = None # In contrast to MCMC we want to project into constrained space. if theta_transform is None : self . link_transform = mcmc_transform ( self . _prior ) . inv else : self . link_transform = theta_transform . inv # This will set the variational distribution and VI method self . set_q ( q , parameters = parameters , modules = modules ) self . set_vi_method ( vi_method ) self . _purpose = ( \"It provides Variational inference to .sample() from the posterior and \" \"can evaluate the _normalized_ posterior density with .log_prob().\" ) evaluate ( self , quality_control_metric = 'psis' , N = 50000 ) \u00b6 This function will evaluate the quality of the variational posterior distribution. We currently support two different metrics of type psis , which checks the quality based on the tails of importance weights (there should not be much with a large one), or prop which checks the proportionality between q and potential_fn. NOTE: In our experience prop is sensitive to distinguish good from ok whereas psis is more sensitive in distinguishing very bad from ok . Parameters: Name Type Description Default quality_control_metric str The metric of choice, we currently support [psis, prop, prop_prior]. 'psis' N int Number of samples which is used to evaluate the metric. 50000 Source code in sbi/inference/posteriors/vi_posterior.py def evaluate ( self , quality_control_metric : str = \"psis\" , N : int = int ( 5e4 )) -> None : \"\"\"This function will evaluate the quality of the variational posterior distribution. We currently support two different metrics of type `psis`, which checks the quality based on the tails of importance weights (there should not be much with a large one), or `prop` which checks the proportionality between q and potential_fn. NOTE: In our experience `prop` is sensitive to distinguish ``good`` from ``ok`` whereas `psis` is more sensitive in distinguishing `very bad` from `ok`. Args: quality_control_metric: The metric of choice, we currently support [psis, prop, prop_prior]. N: Number of samples which is used to evaluate the metric. \"\"\" quality_control_fn , quality_control_msg = get_quality_metric ( quality_control_metric ) metric = round ( float ( quality_control_fn ( self , N = N )), 3 ) print ( f \"Quality Score: { metric } \" + quality_control_msg ) log_prob ( self , theta , x = None , track_gradients = False ) \u00b6 Returns the log-probability of theta under the variational posterior. Parameters: Name Type Description Default theta Tensor Parameters required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis but increases memory consumption. False Returns: Type Description Tensor len($\\theta$) -shaped log-probability. Source code in sbi/inference/posteriors/vi_posterior.py def log_prob ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False , ) -> Tensor : r \"\"\"Returns the log-probability of theta under the variational posterior. Args: theta: Parameters track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis but increases memory consumption. Returns: `len($\\theta$)`-shaped log-probability. \"\"\" x = self . _x_else_default_x ( x ) if self . _trained_on is None or ( x != self . _trained_on ) . all (): raise AttributeError ( f \"The variational posterior was not fit using observation { x } . \\ Please train.\" ) with torch . set_grad_enabled ( track_gradients ): theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . q . log_prob ( theta ) map ( self , x = None , num_iter = 1000 , num_to_optimize = 100 , learning_rate = 0.01 , init_method = 'proposal' , num_init_samples = 10000 , save_best_every = 10 , show_progress_bars = False , force_update = False ) \u00b6 Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map() . The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a BoxUniform , we carry out the optimization in unbounded space and transform the result back into bounded space. Parameters: Name Type Description Default x Optional[Tensor] Deprecated - use .set_default_x() prior to .map() . None num_iter int Number of optimization steps that the algorithm takes to find the MAP. 1000 learning_rate float Learning rate of the optimizer. 0.01 init_method Union[str, Tensor] How to select the starting parameters for the optimization. If it is a string, it can be either [ posterior , prior ], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations. 'proposal' num_init_samples int Draw this number of samples from the posterior and evaluate the log-probability of all of them. 10000 num_to_optimize int From the drawn num_init_samples , use the num_to_optimize with highest log-probability as the initial points for the optimization. 100 save_best_every int The best log-probability is computed, saved in the map -attribute, and printed every save_best_every -th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10 .) 10 show_progress_bars bool Whether to show a progressbar during sampling from the posterior. False force_update bool Whether to re-calculate the MAP when x is unchanged and have a cached value. False log_prob_kwargs Will be empty for SNLE and SNRE. Will contain {\u2018norm_posterior\u2019: True} for SNPE. required Returns: Type Description Tensor The MAP estimate. Source code in sbi/inference/posteriors/vi_posterior.py def map ( self , x : Optional [ TorchTensor ] = None , num_iter : int = 1_000 , num_to_optimize : int = 100 , learning_rate : float = 0.01 , init_method : Union [ str , TorchTensor ] = \"proposal\" , num_init_samples : int = 10_000 , save_best_every : int = 10 , show_progress_bars : bool = False , force_update : bool = False , ) -> Tensor : r \"\"\"Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in `self._map` and can be accessed with `self.map()`. The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a `BoxUniform`, we carry out the optimization in unbounded space and transform the result back into bounded space. Args: x: Deprecated - use `.set_default_x()` prior to `.map()`. num_iter: Number of optimization steps that the algorithm takes to find the MAP. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples the respective distribution `num_init_samples` times. If it is a tensor, the tensor will be used as init locations. num_init_samples: Draw this number of samples from the posterior and evaluate the log-probability of all of them. num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` with highest log-probability as the initial points for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is `10`.) show_progress_bars: Whether to show a progressbar during sampling from the posterior. force_update: Whether to re-calculate the MAP when x is unchanged and have a cached value. log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain {'norm_posterior': True} for SNPE. Returns: The MAP estimate. \"\"\" self . proposal = self . q return super () . map ( x = x , num_iter = num_iter , num_to_optimize = num_to_optimize , learning_rate = learning_rate , init_method = init_method , num_init_samples = num_init_samples , save_best_every = save_best_every , show_progress_bars = show_progress_bars , force_update = force_update , ) potential ( self , theta , x = None , track_gradients = False ) inherited \u00b6 Evaluates \\(\\theta\\) under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of \\(\\theta\\) under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Source code in sbi/inference/posteriors/vi_posterior.py def potential ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Evaluates $\\theta$ under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of $\\theta$ under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) sample ( self , sample_shape = torch . Size ([]), x = None , ** kwargs ) \u00b6 Samples from the variational posterior distribution. Parameters: Name Type Description Default sample_shape Union[torch.Size, Tuple[int, ...]] Shape of samples torch.Size([]) Returns: Type Description Tensor Samples from posterior. Source code in sbi/inference/posteriors/vi_posterior.py def sample ( self , sample_shape : Shape = torch . Size (), x : Optional [ Tensor ] = None , ** kwargs , ) -> Tensor : \"\"\"Samples from the variational posterior distribution. Args: sample_shape: Shape of samples Returns: Samples from posterior. \"\"\" x = self . _x_else_default_x ( x ) if self . _trained_on is None or ( x != self . _trained_on ) . all (): raise AttributeError ( f \"The variational posterior was not fit on the specified `default_x` \" f \" { x } . Please train using `posterior.train()`.\" ) samples = self . q . sample ( torch . Size ( sample_shape )) return samples . reshape (( * sample_shape , samples . shape [ - 1 ])) set_default_x ( self , x ) inherited \u00b6 Set new default x for .sample(), .log_prob to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify x in calls to .sample() and .log_prob() - only $ heta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular x=x_o (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like posterior.set_default_x(my_x).sample(mytheta) are possible. Parameters: Name Type Description Default x Tensor The default observation to set for the posterior \\(p( heta|x)\\) . required Returns: Type Description NeuralPosterior NeuralPosterior that will use a default x when not explicitly passed. Source code in sbi/inference/posteriors/vi_posterior.py def set_default_x ( self , x : Tensor ) -> \"NeuralPosterior\" : \"\"\"Set new default x for `.sample(), .log_prob` to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify `x` in calls to `.sample()` and `.log_prob()` - only $\\theta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular `x=x_o` (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like `posterior.set_default_x(my_x).sample(mytheta)` are possible. Args: x: The default observation to set for the posterior $p(\\theta|x)$. Returns: `NeuralPosterior` that will use a default `x` when not explicitly passed. \"\"\" self . _x = process_x ( x , x_shape = self . _x_shape , allow_iid_x = self . potential_fn . allow_iid_x ) . to ( self . _device ) self . _map = None return self set_q ( self , q , parameters = [], modules = []) \u00b6 Defines the variational family. You can specify over which parameters/modules we optimize. This is required for custom distributions which e.g. do not inherit nn.Modules or has the function parameters or modules to give direct access to trainable parameters. Further, you can pass a function, which constructs a variational distribution if called. Parameters: Name Type Description Default q Union[str, pyro.distributions.torch.TransformedDistribution, VIPosterior, Callable] Variational distribution, either string, distribution, or a VIPosterior object. This specifies a parametric class of distribution over which the best possible posterior approximation is searched. For string input, we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. Of course, you can also specify your own variational family by passing a parameterized distribution object i.e. a torch.distributions Distribution with methods parameters returning an iterable of all parameters (you can pass them within the paramters/modules attribute). Additionally, we allow a Callable , which allows you the pass a builder function, which if called returns an distribution. This may be useful for setting the hyperparameters e.g. num_transfroms:int by using the get_flow_builder method specifying the hyperparameters. If q is already a VIPosterior , then the arguments will be copied from it (relevant for multi-round training). required parameters Iterable List of parameters associated with the distribution object. [] modules Iterable List of modules associated with the distribution object. [] Source code in sbi/inference/posteriors/vi_posterior.py def set_q ( self , q : Union [ str , PyroTransformedDistribution , \"VIPosterior\" , Callable ], parameters : Iterable = [], modules : Iterable = [], ) -> None : \"\"\"Defines the variational family. You can specify over which parameters/modules we optimize. This is required for custom distributions which e.g. do not inherit nn.Modules or has the function `parameters` or `modules` to give direct access to trainable parameters. Further, you can pass a function, which constructs a variational distribution if called. Args: q: Variational distribution, either string, distribution, or a VIPosterior object. This specifies a parametric class of distribution over which the best possible posterior approximation is searched. For string input, we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. Of course, you can also specify your own variational family by passing a `parameterized` distribution object i.e. a torch.distributions Distribution with methods `parameters` returning an iterable of all parameters (you can pass them within the paramters/modules attribute). Additionally, we allow a `Callable`, which allows you the pass a `builder` function, which if called returns an distribution. This may be useful for setting the hyperparameters e.g. `num_transfroms:int` by using the `get_flow_builder` method specifying the hyperparameters. If q is already a `VIPosterior`, then the arguments will be copied from it (relevant for multi-round training). parameters: List of parameters associated with the distribution object. modules: List of modules associated with the distribution object. \"\"\" self . _q_arg = q if isinstance ( q , Distribution ): q = adapt_variational_distribution ( q , self . _prior , self . link_transform , parameters = parameters , modules = modules , ) make_object_deepcopy_compatible ( q ) self_custom_q_init_cache = deepcopy ( q ) self . _q_build_fn = lambda * args , ** kwargs : self_custom_q_init_cache self . _trained_on = None elif isinstance ( q , str ) or isinstance ( q , Callable ): if isinstance ( q , str ): self . _q_build_fn = get_flow_builder ( q ) else : self . _q_build_fn = q q = self . _q_build_fn ( self . _prior . event_shape , self . link_transform , device = self . _device , ) make_object_deepcopy_compatible ( q ) self . _trained_on = None elif isinstance ( q , VIPosterior ): self . _q_build_fn = q . _q_build_fn self . _trained_on = q . _trained_on self . vi_method = q . vi_method # type: ignore self . _device = q . _device self . _prior = q . _prior self . _x = q . _x self . _q_arg = q . _q_arg make_object_deepcopy_compatible ( q . q ) q = deepcopy ( q . q ) move_all_tensor_to_device ( q , self . _device ) assert isinstance ( q , Distribution ), \"\"\"Something went wrong when initializing the variational distribution. Please create an issue on github https://github.com/mackelab/sbi/issues\"\"\" check_variational_distribution ( q , self . _prior ) self . _q = q set_vi_method ( self , method ) \u00b6 Sets variational inference method. Parameters: Name Type Description Default method str One of [rKL, fKL, IW, alpha]. required Returns: Type Description VIPosterior VIPosterior for chainable calls. Source code in sbi/inference/posteriors/vi_posterior.py def set_vi_method ( self , method : str ) -> \"VIPosterior\" : \"\"\"Sets variational inference method. Args: method: One of [rKL, fKL, IW, alpha]. Returns: `VIPosterior` for chainable calls. \"\"\" self . _vi_method = method self . _optimizer_builder = get_VI_method ( method ) return self train ( self , x = None , n_particles = 256 , learning_rate = 0.001 , gamma = 0.999 , max_num_iters = 2000 , min_num_iters = 10 , clip_value = 10.0 , warm_up_rounds = 100 , retrain_from_scratch = False , reset_optimizer = False , show_progress_bar = True , check_for_convergence = True , quality_control = True , quality_control_metric = 'psis' , ** kwargs ) \u00b6 This method trains the variational posterior. Parameters: Name Type Description Default x Optional[Tensor] The observation. None n_particles int Number of samples to approximate expectations within the variational bounds. The larger the more accurate are gradient estimates, but the computational cost per iteration increases. 256 learning_rate float Learning rate of the optimizer. 0.001 gamma float Learning rate decay per iteration. We use an exponential decay scheduler. 0.999 max_num_iters int Maximum number of iterations. 2000 min_num_iters int Minimum number of iterations. 10 clip_value float Gradient clipping value, decreasing may help if you see invalid values. 10.0 warm_up_rounds int Initialize the posterior as the prior. 100 retrain_from_scratch bool Retrain the variational distributions from scratch. False reset_optimizer bool Reset the divergence optimizer False show_progress_bar bool If any progress report should be displayed. True quality_control bool If False quality control is skipped. True quality_control_metric str Which metric to use for evaluating the quality. 'psis' kwargs Hyperparameters check corresponding DivergenceOptimizer for detail eps: Determines sensitivity of convergence check. retain_graph: Boolean which decides whether to retain the computation graph. This may be required for some exotic user-specified q\u2019s. optimizer: A PyTorch Optimizer class e.g. Adam or SGD. See DivergenceOptimizer for details. scheduler: A PyTorch learning rate scheduler. See DivergenceOptimizer for details. alpha: Only used if vi_method= alpha . Determines the alpha divergence. K: Only used if vi_method= IW . Determines the number of importance weighted particles. stick_the_landing: If one should use the STL estimator (only for rKL, IW, alpha). dreg: If one should use the DREG estimator (only for rKL, IW, alpha). weight_transform: Callable applied to importance weights (only for fKL) {} Returns: Type Description VIPosterior VIPosterior (can be used to chain calls). Source code in sbi/inference/posteriors/vi_posterior.py def train ( self , x : Optional [ TorchTensor ] = None , n_particles : int = 256 , learning_rate : float = 1e-3 , gamma : float = 0.999 , max_num_iters : int = 2000 , min_num_iters : int = 10 , clip_value : float = 10.0 , warm_up_rounds : int = 100 , retrain_from_scratch : bool = False , reset_optimizer : bool = False , show_progress_bar : bool = True , check_for_convergence : bool = True , quality_control : bool = True , quality_control_metric : str = \"psis\" , ** kwargs , ) -> \"VIPosterior\" : \"\"\"This method trains the variational posterior. Args: x: The observation. n_particles: Number of samples to approximate expectations within the variational bounds. The larger the more accurate are gradient estimates, but the computational cost per iteration increases. learning_rate: Learning rate of the optimizer. gamma: Learning rate decay per iteration. We use an exponential decay scheduler. max_num_iters: Maximum number of iterations. min_num_iters: Minimum number of iterations. clip_value: Gradient clipping value, decreasing may help if you see invalid values. warm_up_rounds: Initialize the posterior as the prior. retrain_from_scratch: Retrain the variational distributions from scratch. reset_optimizer: Reset the divergence optimizer show_progress_bar: If any progress report should be displayed. quality_control: If False quality control is skipped. quality_control_metric: Which metric to use for evaluating the quality. kwargs: Hyperparameters check corresponding `DivergenceOptimizer` for detail eps: Determines sensitivity of convergence check. retain_graph: Boolean which decides whether to retain the computation graph. This may be required for some `exotic` user-specified q's. optimizer: A PyTorch Optimizer class e.g. Adam or SGD. See `DivergenceOptimizer` for details. scheduler: A PyTorch learning rate scheduler. See `DivergenceOptimizer` for details. alpha: Only used if vi_method=`alpha`. Determines the alpha divergence. K: Only used if vi_method=`IW`. Determines the number of importance weighted particles. stick_the_landing: If one should use the STL estimator (only for rKL, IW, alpha). dreg: If one should use the DREG estimator (only for rKL, IW, alpha). weight_transform: Callable applied to importance weights (only for fKL) Returns: VIPosterior: `VIPosterior` (can be used to chain calls). \"\"\" # Update optimizer with current arguments. if self . _optimizer is not None : self . _optimizer . update ({ ** locals (), ** kwargs }) # Init q and the optimizer if necessary if retrain_from_scratch : self . q = self . _q_build_fn () # type: ignore self . _optimizer = self . _optimizer_builder ( self . potential_fn , self . q , lr = learning_rate , clip_value = clip_value , gamma = gamma , n_particles = n_particles , prior = self . _prior , ** kwargs , ) if ( reset_optimizer or self . _optimizer is None or not isinstance ( self . _optimizer , self . _optimizer_builder ) ): self . _optimizer = self . _optimizer_builder ( self . potential_fn , self . q , lr = learning_rate , clip_value = clip_value , gamma = gamma , n_particles = n_particles , prior = self . _prior , ** kwargs , ) # Check context x = atleast_2d_float32_tensor ( self . _x_else_default_x ( x )) . to ( # type: ignore self . _device ) already_trained = self . _trained_on is not None and ( x == self . _trained_on ) . all () # Optimize optimizer = self . _optimizer optimizer . to ( self . _device ) optimizer . reset_loss_stats () if show_progress_bar : iters = tqdm ( range ( max_num_iters )) else : iters = range ( max_num_iters ) # Warmup before training if reset_optimizer or ( not optimizer . warm_up_was_done and not already_trained ): if show_progress_bar : iters . set_description ( # type: ignore \"Warmup phase, this may take a few seconds...\" ) optimizer . warm_up ( warm_up_rounds ) for i in iters : optimizer . step ( x ) mean_loss , std_loss = optimizer . get_loss_stats () # Update progress bar if show_progress_bar : assert isinstance ( iters , tqdm ) iters . set_description ( # type: ignore f \"Loss: { np . round ( float ( mean_loss ), 2 ) } \" f \"Std: { np . round ( float ( std_loss ), 2 ) } \" ) # Check for convergence if check_for_convergence and i > min_num_iters : if optimizer . converged (): if show_progress_bar : print ( f \" \\n Converged with loss: { np . round ( float ( mean_loss ), 2 ) } \" ) break # Training finished: self . _trained_on = x # Evaluate quality if quality_control : try : self . evaluate ( quality_control_metric = quality_control_metric ) except Exception as e : print ( f \"Quality control did not work, we reset the variational \\ posterior,please check your setting. \\ \\n Following error occured { e } \" ) self . train ( learning_rate = learning_rate * 0.1 , retrain_from_scratch = True , reset_optimizer = True , ) return self Models \u00b6 sbi . utils . get_nn_models . posterior_nn ( model , z_score_theta = 'independent' , z_score_x = 'independent' , hidden_features = 50 , num_transforms = 5 , num_bins = 10 , embedding_net = Identity (), num_components = 10 , ** kwargs ) \u00b6 Returns a function that builds a density estimator for learning the posterior. This function will usually be used for SNPE. The returned function is to be passed to the inference class when using the flexible interface. Parameters: Name Type Description Default model str The type of density estimator that will be created. One of [ mdn , made , maf , maf_rqs , nsf ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 num_transforms int Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a maf or a nsf ). Ignored if density estimator is a mdn or made . 5 num_bins int Number of bins used for the splines in nsf . Ignored if density estimator not nsf . 10 embedding_net Module Optional embedding network for simulation outputs \\(x\\) . This embedding net allows to learn features from potentially high-dimensional simulation outputs. Identity() num_components int Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. 10 kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def posterior_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , num_transforms : int = 5 , num_bins : int = 10 , embedding_net : nn . Module = nn . Identity (), num_components : int = 10 , ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a density estimator for learning the posterior. This function will usually be used for SNPE. The returned function is to be passed to the inference class when using the flexible interface. Args: model: The type of density estimator that will be created. One of [`mdn`, `made`, `maf`, `maf_rqs`, `nsf`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. num_transforms: Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a `maf` or a `nsf`). Ignored if density estimator is a `mdn` or `made`. num_bins: Number of bins used for the splines in `nsf`. Ignored if density estimator not `nsf`. embedding_net: Optional embedding network for simulation outputs $x$. This embedding net allows to learn features from potentially high-dimensional simulation outputs. num_components: Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"num_transforms\" , \"num_bins\" , \"embedding_net\" , \"num_components\" , ), ( z_score_theta , z_score_x , hidden_features , num_transforms , num_bins , embedding_net , num_components , ), ), ** kwargs , ) def build_fn_snpe_a ( batch_theta , batch_x , num_components ): \"\"\"Build function for SNPE-A Extract the number of components from the kwargs, such that they are exposed as a kwargs, offering the possibility to later override this kwarg with `functools.partial`. This is necessary in order to make sure that the MDN in SNPE-A only has one component when running the Algorithm 1 part. \"\"\" return build_mdn ( batch_x = batch_theta , batch_y = batch_x , num_components = num_components , ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"mdn\" : return build_mdn ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"made\" : return build_made ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"maf\" : return build_maf ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"maf_rqs\" : return build_maf_rqs ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"nsf\" : return build_nsf ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) else : raise NotImplementedError if model == \"mdn_snpe_a\" : if num_components != 10 : raise ValueError ( \"You set `num_components`. For SNPE-A, this has to be done at \" \"instantiation of the inference object, i.e. \" \"`inference = SNPE_A(..., num_components=20)`\" ) kwargs . pop ( \"num_components\" ) return build_fn_snpe_a if model == \"mdn_snpe_a\" else build_fn sbi . utils . get_nn_models . likelihood_nn ( model , z_score_theta = 'independent' , z_score_x = 'independent' , hidden_features = 50 , num_transforms = 5 , num_bins = 10 , embedding_net = Identity (), num_components = 10 , ** kwargs ) \u00b6 Returns a function that builds a density estimator for learning the likelihood. This function will usually be used for SNLE. The returned function is to be passed to the inference class when using the flexible interface. Parameters: Name Type Description Default model str The type of density estimator that will be created. One of [ mdn , made , maf , maf_rqs , nsf ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 num_transforms int Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a maf or a nsf ). Ignored if density estimator is a mdn or made . 5 num_bins int Number of bins used for the splines in nsf . Ignored if density estimator not nsf . 10 embedding_net Module Optional embedding network for parameters \\(\\theta\\) . Identity() num_components int Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. 10 kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def likelihood_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , num_transforms : int = 5 , num_bins : int = 10 , embedding_net : nn . Module = nn . Identity (), num_components : int = 10 , ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a density estimator for learning the likelihood. This function will usually be used for SNLE. The returned function is to be passed to the inference class when using the flexible interface. Args: model: The type of density estimator that will be created. One of [`mdn`, `made`, `maf`, `maf_rqs`, `nsf`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. num_transforms: Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a `maf` or a `nsf`). Ignored if density estimator is a `mdn` or `made`. num_bins: Number of bins used for the splines in `nsf`. Ignored if density estimator not `nsf`. embedding_net: Optional embedding network for parameters $\\theta$. num_components: Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"num_transforms\" , \"num_bins\" , \"embedding_net\" , \"num_components\" , ), ( z_score_x , z_score_theta , hidden_features , num_transforms , num_bins , embedding_net , num_components , ), ), ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"mdn\" : return build_mdn ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"made\" : return build_made ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"maf\" : return build_maf ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"maf_rqs\" : return build_maf_rqs ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"nsf\" : return build_nsf ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"mnle\" : return build_mnle ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) else : raise NotImplementedError return build_fn sbi . utils . get_nn_models . classifier_nn ( model , z_score_theta = 'independent' , z_score_x = 'independent' , hidden_features = 50 , embedding_net_theta = Identity (), embedding_net_x = Identity (), ** kwargs ) \u00b6 Returns a function that builds a classifier for learning density ratios. This function will usually be used for SNRE. The returned function is to be passed to the inference class when using the flexible interface. Note that in the view of the SNRE classifier we build below, x=theta and y=x. Parameters: Name Type Description Default model str The type of classifier that will be created. One of [ linear , mlp , resnet ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 embedding_net_theta Module Optional embedding network for parameters \\(\\theta\\) . Identity() embedding_net_x Module Optional embedding network for simulation outputs \\(x\\) . This embedding net allows to learn features from potentially high-dimensional simulation outputs. Identity() kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def classifier_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , embedding_net_theta : nn . Module = nn . Identity (), embedding_net_x : nn . Module = nn . Identity (), ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a classifier for learning density ratios. This function will usually be used for SNRE. The returned function is to be passed to the inference class when using the flexible interface. Note that in the view of the SNRE classifier we build below, x=theta and y=x. Args: model: The type of classifier that will be created. One of [`linear`, `mlp`, `resnet`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. embedding_net_theta: Optional embedding network for parameters $\\theta$. embedding_net_x: Optional embedding network for simulation outputs $x$. This embedding net allows to learn features from potentially high-dimensional simulation outputs. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"embedding_net_x\" , \"embedding_net_y\" , ), ( z_score_theta , z_score_x , hidden_features , embedding_net_theta , embedding_net_x , ), ), ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"linear\" : return build_linear_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) if model == \"mlp\" : return build_mlp_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) if model == \"resnet\" : return build_resnet_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) else : raise NotImplementedError return build_fn Potentials \u00b6 sbi . inference . potentials . posterior_based_potential . posterior_estimator_based_potential ( posterior_estimator , prior , x_o , enable_transform = True ) \u00b6 Returns the potential for posterior-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. The potential is the same as the log-probability of the posterior_estimator , but it is set to \\(-\\inf\\) outside of the prior bounds. Parameters: Name Type Description Default posterior_estimator Module The neural network modelling the posterior. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the posterior. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/posterior_based_potential.py def posterior_estimator_based_potential ( posterior_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns the potential for posterior-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. The potential is the same as the log-probability of the `posterior_estimator`, but it is set to $-\\inf$ outside of the prior bounds. Args: posterior_estimator: The neural network modelling the posterior. prior: The prior distribution. x_o: The observed data at which to evaluate the posterior. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( posterior_estimator . parameters ()) . device ) potential_fn = PosteriorBasedPotential ( posterior_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform sbi . inference . potentials . likelihood_based_potential . likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o , enable_transform = True ) \u00b6 Returns potential \\(\\log(p(x_o|\\theta)p(\\theta))\\) for likelihood-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Parameters: Name Type Description Default likelihood_estimator Module The neural network modelling the likelihood. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the likelihood. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function \\(p(x_o|\\theta)p(\\theta)\\) and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/likelihood_based_potential.py def likelihood_estimator_based_potential ( likelihood_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns potential $\\log(p(x_o|\\theta)p(\\theta))$ for likelihood-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Args: likelihood_estimator: The neural network modelling the likelihood. prior: The prior distribution. x_o: The observed data at which to evaluate the likelihood. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function $p(x_o|\\theta)p(\\theta)$ and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( likelihood_estimator . parameters ()) . device ) potential_fn = LikelihoodBasedPotential ( likelihood_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform sbi . inference . potentials . ratio_based_potential . ratio_estimator_based_potential ( ratio_estimator , prior , x_o , enable_transform = True ) \u00b6 Returns the potential for ratio-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Parameters: Name Type Description Default ratio_estimator Module The neural network modelling likelihood-to-evidence ratio. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the likelihood-to-evidence ratio. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/ratio_based_potential.py def ratio_estimator_based_potential ( ratio_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns the potential for ratio-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Args: ratio_estimator: The neural network modelling likelihood-to-evidence ratio. prior: The prior distribution. x_o: The observed data at which to evaluate the likelihood-to-evidence ratio. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( ratio_estimator . parameters ()) . device ) potential_fn = RatioBasedPotential ( ratio_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform Analysis \u00b6 sbi . analysis . plot . pairplot ( samples , points = None , limits = None , subset = None , offdiag = 'hist' , diag = 'hist' , figsize = ( 10 , 10 ), labels = None , ticks = [], upper = None , fig = None , axes = None , ** kwargs ) \u00b6 Plot samples in a 2D grid showing marginals and pairwise marginals. Each of the diagonal plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Each upper-diagonal plot can be interpreted as a 2D-marginal of the distribution. Parameters: Name Type Description Default samples Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Samples used to build the histogram. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] List of additional points to scatter. None limits Union[List, torch.Tensor] Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on). None offdiag Union[str, List[str]] Plotting style for upper diagonal, {hist, scatter, contour, cond, None}. 'hist' upper Optional[str] deprecated, use offdiag instead. None diag Union[str, List[str]] Plotting style for diagonal, {hist, cond, None}. 'hist' figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def pairplot ( samples : Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , limits : Optional [ Union [ List , torch . Tensor ]] = None , subset : Optional [ List [ int ]] = None , offdiag : Optional [ Union [ List [ str ], str ]] = \"hist\" , diag : Optional [ Union [ List [ str ], str ]] = \"hist\" , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], upper : Optional [ str ] = None , fig = None , axes = None , ** kwargs , ): \"\"\" Plot samples in a 2D grid showing marginals and pairwise marginals. Each of the diagonal plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Each upper-diagonal plot can be interpreted as a 2D-marginal of the distribution. Args: samples: Samples used to build the histogram. points: List of additional points to scatter. limits: Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on). offdiag: Plotting style for upper diagonal, {hist, scatter, contour, cond, None}. upper: deprecated, use offdiag instead. diag: Plotting style for diagonal, {hist, cond, None}. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" # TODO: add color map support # TODO: automatically determine good bin sizes for histograms # TODO: add legend (if legend is True) opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) samples , dim , limits = prepare_for_plot ( samples , limits ) # checks. if opts [ \"legend\" ]: assert len ( opts [ \"samples_labels\" ]) >= len ( samples ), \"Provide at least as many labels as samples.\" if opts [ \"upper\" ] is not None : warn ( \"upper is deprecated, use offdiag instead.\" ) opts [ \"offdiag\" ] = opts [ \"upper\" ] # Prepare diag/upper/lower if type ( opts [ \"diag\" ]) is not list : opts [ \"diag\" ] = [ opts [ \"diag\" ] for _ in range ( len ( samples ))] if type ( opts [ \"offdiag\" ]) is not list : opts [ \"offdiag\" ] = [ opts [ \"offdiag\" ] for _ in range ( len ( samples ))] # if type(opts['lower']) is not list: # opts['lower'] = [opts['lower'] for _ in range(len(samples))] opts [ \"lower\" ] = None diag_func = get_diag_func ( samples , limits , opts , ** kwargs ) def offdiag_func ( row , col , limits , ** kwargs ): if len ( samples ) > 0 : for n , v in enumerate ( samples ): if opts [ \"offdiag\" ][ n ] == \"hist\" or opts [ \"offdiag\" ][ n ] == \"hist2d\" : hist , xedges , yedges = np . histogram2d ( v [:, col ], v [:, row ], range = [ [ limits [ col ][ 0 ], limits [ col ][ 1 ]], [ limits [ row ][ 0 ], limits [ row ][ 1 ]], ], ** opts [ \"hist_offdiag\" ], ) plt . imshow ( hist . T , origin = \"lower\" , extent = ( xedges [ 0 ], xedges [ - 1 ], yedges [ 0 ], yedges [ - 1 ], ), aspect = \"auto\" , ) elif opts [ \"offdiag\" ][ n ] in [ \"kde\" , \"kde2d\" , \"contour\" , \"contourf\" , ]: density = gaussian_kde ( v [:, [ col , row ]] . T , bw_method = opts [ \"kde_offdiag\" ][ \"bw_method\" ], ) X , Y = np . meshgrid ( np . linspace ( limits [ col ][ 0 ], limits [ col ][ 1 ], opts [ \"kde_offdiag\" ][ \"bins\" ], ), np . linspace ( limits [ row ][ 0 ], limits [ row ][ 1 ], opts [ \"kde_offdiag\" ][ \"bins\" ], ), ) positions = np . vstack ([ X . ravel (), Y . ravel ()]) Z = np . reshape ( density ( positions ) . T , X . shape ) if opts [ \"offdiag\" ][ n ] == \"kde\" or opts [ \"offdiag\" ][ n ] == \"kde2d\" : plt . imshow ( Z , extent = ( limits [ col ][ 0 ], limits [ col ][ 1 ], limits [ row ][ 0 ], limits [ row ][ 1 ], ), origin = \"lower\" , aspect = \"auto\" , ) elif opts [ \"offdiag\" ][ n ] == \"contour\" : if opts [ \"contour_offdiag\" ][ \"percentile\" ]: Z = probs2contours ( Z , opts [ \"contour_offdiag\" ][ \"levels\" ]) else : Z = ( Z - Z . min ()) / ( Z . max () - Z . min ()) plt . contour ( X , Y , Z , origin = \"lower\" , extent = [ limits [ col ][ 0 ], limits [ col ][ 1 ], limits [ row ][ 0 ], limits [ row ][ 1 ], ], colors = opts [ \"samples_colors\" ][ n ], levels = opts [ \"contour_offdiag\" ][ \"levels\" ], ) else : pass elif opts [ \"offdiag\" ][ n ] == \"scatter\" : plt . scatter ( v [:, col ], v [:, row ], color = opts [ \"samples_colors\" ][ n ], ** opts [ \"scatter_offdiag\" ], ) elif opts [ \"offdiag\" ][ n ] == \"plot\" : plt . plot ( v [:, col ], v [:, row ], color = opts [ \"samples_colors\" ][ n ], ** opts [ \"plot_offdiag\" ], ) else : pass return _arrange_plots ( diag_func , offdiag_func , dim , limits , points , opts , fig = fig , axes = axes ) sbi . analysis . plot . marginal_plot ( samples , points = None , limits = None , subset = None , diag = 'hist' , figsize = ( 10 , 10 ), labels = None , ticks = [], fig = None , axes = None , ** kwargs ) \u00b6 Plot samples in a row showing 1D marginals of selected dimensions. Each of the plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Parameters: Name Type Description Default samples Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Samples used to build the histogram. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] List of additional points to scatter. None limits Union[List, torch.Tensor] Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on). None diag Optional[str] Plotting style for 1D marginals, {hist, kde cond, None}. 'hist' figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] points_colors Colors of the points . required fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def marginal_plot ( samples : Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , limits : Optional [ Union [ List , torch . Tensor ]] = None , subset : Optional [ List [ int ]] = None , diag : Optional [ str ] = \"hist\" , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], fig = None , axes = None , ** kwargs , ): \"\"\" Plot samples in a row showing 1D marginals of selected dimensions. Each of the plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Args: samples: Samples used to build the histogram. points: List of additional points to scatter. limits: Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on). diag: Plotting style for 1D marginals, {hist, kde cond, None}. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. points_colors: Colors of the `points`. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) samples , dim , limits = prepare_for_plot ( samples , limits ) # Prepare diag/upper/lower if type ( opts [ \"diag\" ]) is not list : opts [ \"diag\" ] = [ opts [ \"diag\" ] for _ in range ( len ( samples ))] diag_func = get_diag_func ( samples , limits , opts , ** kwargs ) return _arrange_plots ( diag_func , None , dim , limits , points , opts , fig = fig , axes = axes ) sbi . analysis . plot . conditional_pairplot ( density , condition , limits , points = None , subset = None , resolution = 50 , figsize = ( 10 , 10 ), labels = None , ticks = [], fig = None , axes = None , ** kwargs ) \u00b6 Plot conditional distribution given all other parameters. The conditionals can be interpreted as slices through the density at a location given by condition . For example: Say we have a 3D density with parameters \\(\\theta_0\\) , \\(\\theta_1\\) , \\(\\theta_2\\) and a condition \\(c\\) passed by the user in the condition argument. For the plot of \\(\\theta_0\\) on the diagonal, this will plot the conditional \\(p(\\theta_0 | \\theta_1=c[1], \\theta_2=c[2])\\) . For the upper diagonal of \\(\\theta_1\\) and \\(\\theta_2\\) , it will plot \\(p(\\theta_1, \\theta_2 | \\theta_0=c[0])\\) . All other diagonals and upper-diagonals are built in the corresponding way. Parameters: Name Type Description Default density Any Probability density with a log_prob() method. required condition Tensor Condition that all but the one/two regarded parameters are fixed to. The condition should be of shape (1, dim_theta), i.e. it could e.g. be a sample from the posterior distribution. required limits Union[List, torch.Tensor] Limits in between which each parameter will be evaluated. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Additional points to scatter. None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on) None resolution int Resolution of the grid at which we evaluate the pdf . 50 figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] points_colors Colors of the points . required fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def conditional_pairplot ( density : Any , condition : torch . Tensor , limits : Union [ List , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , subset : Optional [ List [ int ]] = None , resolution : int = 50 , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], fig = None , axes = None , ** kwargs , ): r \"\"\" Plot conditional distribution given all other parameters. The conditionals can be interpreted as slices through the `density` at a location given by `condition`. For example: Say we have a 3D density with parameters $\\theta_0$, $\\theta_1$, $\\theta_2$ and a condition $c$ passed by the user in the `condition` argument. For the plot of $\\theta_0$ on the diagonal, this will plot the conditional $p(\\theta_0 | \\theta_1=c[1], \\theta_2=c[2])$. For the upper diagonal of $\\theta_1$ and $\\theta_2$, it will plot $p(\\theta_1, \\theta_2 | \\theta_0=c[0])$. All other diagonals and upper-diagonals are built in the corresponding way. Args: density: Probability density with a `log_prob()` method. condition: Condition that all but the one/two regarded parameters are fixed to. The condition should be of shape (1, dim_theta), i.e. it could e.g. be a sample from the posterior distribution. limits: Limits in between which each parameter will be evaluated. points: Additional points to scatter. subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on) resolution: Resolution of the grid at which we evaluate the `pdf`. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. points_colors: Colors of the `points`. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" device = density . _device if hasattr ( density , \"_device\" ) else \"cpu\" # Setting these is required because _pairplot_scaffold will check if opts['diag'] is # `None`. This would break if opts has no key 'diag'. Same for 'upper'. diag = \"cond\" offdiag = \"cond\" opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) opts [ \"lower\" ] = None dim , limits , eps_margins = prepare_for_conditional_plot ( condition , opts ) diag_func = get_conditional_diag_func ( opts , limits , eps_margins , resolution ) def offdiag_func ( row , col , ** kwargs ): p_image = ( eval_conditional_density ( opts [ \"density\" ], opts [ \"condition\" ] . to ( device ), limits . to ( device ), row , col , resolution = resolution , eps_margins1 = eps_margins [ row ], eps_margins2 = eps_margins [ col ], ) . to ( \"cpu\" ) . numpy () ) plt . imshow ( p_image . T , origin = \"lower\" , extent = ( limits [ col , 0 ] . item (), limits [ col , 1 ] . item (), limits [ row , 0 ] . item (), limits [ row , 1 ] . item (), ), aspect = \"auto\" , ) return _arrange_plots ( diag_func , offdiag_func , dim , limits , points , opts , fig = fig , axes = axes ) sbi . analysis . conditional_density . conditional_corrcoeff ( density , limits , condition , subset = None , resolution = 50 ) \u00b6 Returns the conditional correlation matrix of a distribution. To compute the conditional distribution, we condition all but two parameters to values from condition , and then compute the Pearson correlation coefficient \\(\\rho\\) between the remaining two parameters under the distribution density . We do so for any pair of parameters specified in subset , thus creating a matrix containing conditional correlations between any pair of parameters. If condition is a batch of conditions, this function computes the conditional correlation matrix for each one of them and returns the mean. Parameters: Name Type Description Default density Any Probability density function with .log_prob() function. required limits Tensor Limits within which to evaluate the density . required condition Tensor Values to condition the density on. If a batch of conditions is passed, we compute the conditional correlation matrix for each of them and return the average conditional correlation matrix. required subset Optional[List[int]] Evaluate the conditional distribution only on a subset of dimensions. If None this function uses all dimensions. None resolution int Number of grid points on which the conditional distribution is evaluated. A higher value increases the accuracy of the estimated correlation but also increases the computational cost. 50 Returns: Average conditional correlation matrix of shape either (num_dim, num_dim) or (len(subset), len(subset)) if subset was specified. Source code in sbi/analysis/conditional_density.py def conditional_corrcoeff ( density : Any , limits : Tensor , condition : Tensor , subset : Optional [ List [ int ]] = None , resolution : int = 50 , ) -> Tensor : r \"\"\"Returns the conditional correlation matrix of a distribution. To compute the conditional distribution, we condition all but two parameters to values from `condition`, and then compute the Pearson correlation coefficient $\\rho$ between the remaining two parameters under the distribution `density`. We do so for any pair of parameters specified in `subset`, thus creating a matrix containing conditional correlations between any pair of parameters. If `condition` is a batch of conditions, this function computes the conditional correlation matrix for each one of them and returns the mean. Args: density: Probability density function with `.log_prob()` function. limits: Limits within which to evaluate the `density`. condition: Values to condition the `density` on. If a batch of conditions is passed, we compute the conditional correlation matrix for each of them and return the average conditional correlation matrix. subset: Evaluate the conditional distribution only on a subset of dimensions. If `None` this function uses all dimensions. resolution: Number of grid points on which the conditional distribution is evaluated. A higher value increases the accuracy of the estimated correlation but also increases the computational cost. Returns: Average conditional correlation matrix of shape either `(num_dim, num_dim)` or `(len(subset), len(subset))` if `subset` was specified. \"\"\" device = density . _device if hasattr ( density , \"_device\" ) else \"cpu\" subset_ = subset if subset is not None else range ( condition . shape [ 1 ]) correlation_matrices = [] for cond in condition : correlation_matrices . append ( torch . stack ( [ compute_corrcoeff ( eval_conditional_density ( density , cond . to ( device ), limits . to ( device ), dim1 = dim1 , dim2 = dim2 , resolution = resolution , ), limits [[ dim1 , dim2 ]] . to ( device ), ) for dim1 in subset_ for dim2 in subset_ if dim1 < dim2 ] ) ) average_correlations = torch . mean ( torch . stack ( correlation_matrices ), dim = 0 ) # `average_correlations` is still a vector containing the upper triangular entries. # Below, assemble them into a matrix: av_correlation_matrix = torch . zeros (( len ( subset_ ), len ( subset_ )), device = device ) triu_indices = torch . triu_indices ( row = len ( subset_ ), col = len ( subset_ ), offset = 1 , device = device ) av_correlation_matrix [ triu_indices [ 0 ], triu_indices [ 1 ]] = average_correlations # Make the matrix symmetric by copying upper diagonal to lower diagonal. av_correlation_matrix = torch . triu ( av_correlation_matrix ) + torch . tril ( av_correlation_matrix . T ) av_correlation_matrix . fill_diagonal_ ( 1.0 ) return av_correlation_matrix","title":"API Reference"},{"location":"reference/#api-reference","text":"","title":"API Reference"},{"location":"reference/#inference","text":"","title":"Inference"},{"location":"reference/#sbi.inference.base.infer","text":"Runs simulation-based inference and returns the posterior. This function provides a simple interface to run sbi. Inference is run for a single round and hence the returned posterior \\(p(\\theta|x)\\) can be sampled and evaluated for any \\(x\\) (i.e. it is amortized). The scope of this function is limited to the most essential features of sbi. For more flexibility (e.g. multi-round inference, different density estimators) please use the flexible interface described here: https://www.mackelab.org/sbi/tutorial/02_flexible_interface/ Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\mathrm{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required prior Distribution A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with .log_prob() and .sample() (for example, a PyTorch distribution) can be used. required method str What inference method to use. Either of SNPE, SNLE or SNRE. required num_simulations int Number of simulation calls. More simulations means a longer runtime, but a better posterior estimate. required num_workers int Number of parallel workers to use for simulations. 1 Returns: Posterior over parameters conditional on observations (amortized). Source code in sbi/inference/base.py def infer ( simulator : Callable , prior : Distribution , method : str , num_simulations : int , num_workers : int = 1 , ) -> NeuralPosterior : r \"\"\"Runs simulation-based inference and returns the posterior. This function provides a simple interface to run sbi. Inference is run for a single round and hence the returned posterior $p(\\theta|x)$ can be sampled and evaluated for any $x$ (i.e. it is amortized). The scope of this function is limited to the most essential features of sbi. For more flexibility (e.g. multi-round inference, different density estimators) please use the flexible interface described here: https://www.mackelab.org/sbi/tutorial/02_flexible_interface/ Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\mathrm{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. method: What inference method to use. Either of SNPE, SNLE or SNRE. num_simulations: Number of simulation calls. More simulations means a longer runtime, but a better posterior estimate. num_workers: Number of parallel workers to use for simulations. Returns: Posterior over parameters conditional on observations (amortized). \"\"\" try : method_fun : Callable = getattr ( sbi . inference , method . upper ()) except AttributeError : raise NameError ( \"Method not available. `method` must be one of 'SNPE', 'SNLE', 'SNRE'.\" ) simulator , prior = prepare_for_sbi ( simulator , prior ) inference = method_fun ( prior = prior ) theta , x = simulate_for_sbi ( simulator = simulator , proposal = prior , num_simulations = num_simulations , num_workers = num_workers , ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () return posterior","title":"infer()"},{"location":"reference/#sbi.utils.user_input_checks.prepare_for_sbi","text":"Prepare simulator and prior for usage in sbi. NOTE: This is a wrapper around process_prior and process_simulator which can be used in isolation as well. Attempts to meet the following requirements by reshaping and type-casting: the simulator function receives as input and returns a Tensor. the simulator can simulate batches of parameters and return batches of data. the prior does not produce batches and samples and evaluates to Tensor. the output shape is a torch.Size((1,N)) (i.e, has a leading batch dimension 1). If this is not possible, a suitable exception will be raised. Parameters: Name Type Description Default simulator Callable Simulator as provided by the user. required prior Prior as provided by the user. required Returns: Type Description Tuple[Callable, torch.distributions.distribution.Distribution] Tuple (simulator, prior) checked and matching the requirements of sbi. Source code in sbi/utils/user_input_checks.py def prepare_for_sbi ( simulator : Callable , prior ) -> Tuple [ Callable , Distribution ]: \"\"\"Prepare simulator and prior for usage in sbi. NOTE: This is a wrapper around `process_prior` and `process_simulator` which can be used in isolation as well. Attempts to meet the following requirements by reshaping and type-casting: - the simulator function receives as input and returns a Tensor.
    - the simulator can simulate batches of parameters and return batches of data.
    - the prior does not produce batches and samples and evaluates to Tensor.
    - the output shape is a `torch.Size((1,N))` (i.e, has a leading batch dimension 1). If this is not possible, a suitable exception will be raised. Args: simulator: Simulator as provided by the user. prior: Prior as provided by the user. Returns: Tuple (simulator, prior) checked and matching the requirements of sbi. \"\"\" # Check prior, return PyTorch prior. prior , _ , prior_returns_numpy = process_prior ( prior ) # Check simulator, returns PyTorch simulator able to simulate batches. simulator = process_simulator ( simulator , prior , prior_returns_numpy ) # Consistency check after making ready for sbi. check_sbi_inputs ( simulator , prior ) return simulator , prior","title":"prepare_for_sbi()"},{"location":"reference/#sbi.inference.base.simulate_for_sbi","text":"Returns ( \\(\\theta, x\\) ) pairs obtained from sampling the proposal and simulating. This function performs two steps: Sample parameters \\(\\theta\\) from the proposal . Simulate these parameters to obtain \\(x\\) . Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\text{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required proposal Any Probability distribution that the parameters \\(\\theta\\) are sampled from. required num_simulations int Number of simulations that are run. required num_workers int Number of parallel workers to use for simulations. 1 simulation_batch_size int Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). 1 seed Optional[int] Seed for reproducibility. None show_progress_bar bool Whether to show a progress bar for simulating. This will not affect whether there will be a progressbar while drawing samples from the proposal. True Returns: Sampled parameters \\(\\theta\\) and simulation-outputs \\(x\\) . Source code in sbi/inference/base.py def simulate_for_sbi ( simulator : Callable , proposal : Any , num_simulations : int , num_workers : int = 1 , simulation_batch_size : int = 1 , seed : Optional [ int ] = None , show_progress_bar : bool = True , ) -> Tuple [ Tensor , Tensor ]: r \"\"\"Returns ($\\theta, x$) pairs obtained from sampling the proposal and simulating. This function performs two steps: - Sample parameters $\\theta$ from the `proposal`. - Simulate these parameters to obtain $x$. Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\text{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. proposal: Probability distribution that the parameters $\\theta$ are sampled from. num_simulations: Number of simulations that are run. num_workers: Number of parallel workers to use for simulations. simulation_batch_size: Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). seed: Seed for reproducibility. show_progress_bar: Whether to show a progress bar for simulating. This will not affect whether there will be a progressbar while drawing samples from the proposal. Returns: Sampled parameters $\\theta$ and simulation-outputs $x$. \"\"\" theta = proposal . sample (( num_simulations ,)) x = simulate_in_batches ( simulator = simulator , theta = theta , sim_batch_size = simulation_batch_size , num_workers = num_workers , seed = seed , show_progress_bars = show_progress_bar , ) return theta , x","title":"simulate_for_sbi()"},{"location":"reference/#sbi.inference.snpe.snpe_a.SNPE_A","text":"","title":"SNPE_A"},{"location":"reference/#sbi.inference.snpe.snpe_c.SNPE_C","text":"","title":"SNPE_C"},{"location":"reference/#sbi.inference.snle.snle_a.SNLE_A","text":"","title":"SNLE_A"},{"location":"reference/#sbi.inference.snre.snre_a.SNRE_A","text":"","title":"SNRE_A"},{"location":"reference/#sbi.inference.snre.snre_b.SNRE_B","text":"","title":"SNRE_B"},{"location":"reference/#sbi.inference.snre.snre_c.SNRE_C","text":"","title":"SNRE_C"},{"location":"reference/#sbi.inference.snre.bnre.BNRE","text":"","title":"BNRE"},{"location":"reference/#sbi.inference.abc.mcabc.MCABC","text":"","title":"MCABC"},{"location":"reference/#sbi.inference.abc.smcabc.SMCABC","text":"","title":"SMCABC"},{"location":"reference/#posteriors","text":"","title":"Posteriors"},{"location":"reference/#sbi.inference.posteriors.direct_posterior.DirectPosterior","text":"Posterior \\(p(\\theta|x_o)\\) with log_prob() and sample() methods, only applicable to SNPE. SNPE trains a neural network to directly approximate the posterior distribution. However, for bounded priors, the neural network can have leakage: it puts non-zero mass in regions where the prior is zero. The DirectPosterior class wraps the trained network to deal with these cases. Specifically, this class offers the following functionality: - correct the calculation of the log probability such that it compensates for the leakage. - reject samples that lie outside of the prior bounds. This class can not be used in combination with SNLE or SNRE.","title":"DirectPosterior"},{"location":"reference/#sbi.inference.posteriors.importance_posterior.ImportanceSamplingPosterior","text":"Provides importance sampling to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). ImportanceSamplingPosterior allows to estimate the posterior log-probability by estimating the normlalization constant with importance sampling. It also allows to perform importance sampling (with .sample() ) and to draw approximate samples with sampling-importance-resampling (SIR) (with .sir_sample() )","title":"ImportanceSamplingPosterior"},{"location":"reference/#sbi.inference.posteriors.mcmc_posterior.MCMCPosterior","text":"Provides MCMC to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). MCMCPosterior allows to sample from the posterior with MCMC.","title":"MCMCPosterior"},{"location":"reference/#sbi.inference.posteriors.rejection_posterior.RejectionPosterior","text":"Provides rejection sampling to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). RejectionPosterior allows to sample from the posterior with rejection sampling.","title":"RejectionPosterior"},{"location":"reference/#sbi.inference.posteriors.vi_posterior.VIPosterior","text":"Provides VI (Variational Inference) to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). VIPosterior allows to learn a tractable variational posterior \\(q(\\theta)\\) which approximates the true posterior \\(p(\\theta|x_o)\\) . After this second training stage, we can produce approximate posterior samples, by just sampling from q with no additional cost. For additional information see [1] and [2]. References: [1] Variational methods for simulation-based inference, Manuel Gl\u00f6ckler, Michael Deistler, Jakob Macke, 2022, https://openreview.net/forum?id=kZ0UYdhqkNY [2] Sequential Neural Posterior and Likelihood Approximation, Samuel Wiqvist, Jes Frellsen, Umberto Picchini, 2021, https://arxiv.org/abs/2102.06522","title":"VIPosterior"},{"location":"reference/#models","text":"","title":"Models"},{"location":"reference/#sbi.utils.get_nn_models.posterior_nn","text":"Returns a function that builds a density estimator for learning the posterior. This function will usually be used for SNPE. The returned function is to be passed to the inference class when using the flexible interface. Parameters: Name Type Description Default model str The type of density estimator that will be created. One of [ mdn , made , maf , maf_rqs , nsf ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 num_transforms int Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a maf or a nsf ). Ignored if density estimator is a mdn or made . 5 num_bins int Number of bins used for the splines in nsf . Ignored if density estimator not nsf . 10 embedding_net Module Optional embedding network for simulation outputs \\(x\\) . This embedding net allows to learn features from potentially high-dimensional simulation outputs. Identity() num_components int Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. 10 kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def posterior_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , num_transforms : int = 5 , num_bins : int = 10 , embedding_net : nn . Module = nn . Identity (), num_components : int = 10 , ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a density estimator for learning the posterior. This function will usually be used for SNPE. The returned function is to be passed to the inference class when using the flexible interface. Args: model: The type of density estimator that will be created. One of [`mdn`, `made`, `maf`, `maf_rqs`, `nsf`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. num_transforms: Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a `maf` or a `nsf`). Ignored if density estimator is a `mdn` or `made`. num_bins: Number of bins used for the splines in `nsf`. Ignored if density estimator not `nsf`. embedding_net: Optional embedding network for simulation outputs $x$. This embedding net allows to learn features from potentially high-dimensional simulation outputs. num_components: Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"num_transforms\" , \"num_bins\" , \"embedding_net\" , \"num_components\" , ), ( z_score_theta , z_score_x , hidden_features , num_transforms , num_bins , embedding_net , num_components , ), ), ** kwargs , ) def build_fn_snpe_a ( batch_theta , batch_x , num_components ): \"\"\"Build function for SNPE-A Extract the number of components from the kwargs, such that they are exposed as a kwargs, offering the possibility to later override this kwarg with `functools.partial`. This is necessary in order to make sure that the MDN in SNPE-A only has one component when running the Algorithm 1 part. \"\"\" return build_mdn ( batch_x = batch_theta , batch_y = batch_x , num_components = num_components , ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"mdn\" : return build_mdn ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"made\" : return build_made ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"maf\" : return build_maf ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"maf_rqs\" : return build_maf_rqs ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"nsf\" : return build_nsf ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) else : raise NotImplementedError if model == \"mdn_snpe_a\" : if num_components != 10 : raise ValueError ( \"You set `num_components`. For SNPE-A, this has to be done at \" \"instantiation of the inference object, i.e. \" \"`inference = SNPE_A(..., num_components=20)`\" ) kwargs . pop ( \"num_components\" ) return build_fn_snpe_a if model == \"mdn_snpe_a\" else build_fn","title":"posterior_nn()"},{"location":"reference/#sbi.utils.get_nn_models.likelihood_nn","text":"Returns a function that builds a density estimator for learning the likelihood. This function will usually be used for SNLE. The returned function is to be passed to the inference class when using the flexible interface. Parameters: Name Type Description Default model str The type of density estimator that will be created. One of [ mdn , made , maf , maf_rqs , nsf ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 num_transforms int Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a maf or a nsf ). Ignored if density estimator is a mdn or made . 5 num_bins int Number of bins used for the splines in nsf . Ignored if density estimator not nsf . 10 embedding_net Module Optional embedding network for parameters \\(\\theta\\) . Identity() num_components int Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. 10 kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def likelihood_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , num_transforms : int = 5 , num_bins : int = 10 , embedding_net : nn . Module = nn . Identity (), num_components : int = 10 , ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a density estimator for learning the likelihood. This function will usually be used for SNLE. The returned function is to be passed to the inference class when using the flexible interface. Args: model: The type of density estimator that will be created. One of [`mdn`, `made`, `maf`, `maf_rqs`, `nsf`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. num_transforms: Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a `maf` or a `nsf`). Ignored if density estimator is a `mdn` or `made`. num_bins: Number of bins used for the splines in `nsf`. Ignored if density estimator not `nsf`. embedding_net: Optional embedding network for parameters $\\theta$. num_components: Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"num_transforms\" , \"num_bins\" , \"embedding_net\" , \"num_components\" , ), ( z_score_x , z_score_theta , hidden_features , num_transforms , num_bins , embedding_net , num_components , ), ), ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"mdn\" : return build_mdn ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"made\" : return build_made ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"maf\" : return build_maf ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"maf_rqs\" : return build_maf_rqs ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"nsf\" : return build_nsf ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"mnle\" : return build_mnle ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) else : raise NotImplementedError return build_fn","title":"likelihood_nn()"},{"location":"reference/#sbi.utils.get_nn_models.classifier_nn","text":"Returns a function that builds a classifier for learning density ratios. This function will usually be used for SNRE. The returned function is to be passed to the inference class when using the flexible interface. Note that in the view of the SNRE classifier we build below, x=theta and y=x. Parameters: Name Type Description Default model str The type of classifier that will be created. One of [ linear , mlp , resnet ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 embedding_net_theta Module Optional embedding network for parameters \\(\\theta\\) . Identity() embedding_net_x Module Optional embedding network for simulation outputs \\(x\\) . This embedding net allows to learn features from potentially high-dimensional simulation outputs. Identity() kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def classifier_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , embedding_net_theta : nn . Module = nn . Identity (), embedding_net_x : nn . Module = nn . Identity (), ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a classifier for learning density ratios. This function will usually be used for SNRE. The returned function is to be passed to the inference class when using the flexible interface. Note that in the view of the SNRE classifier we build below, x=theta and y=x. Args: model: The type of classifier that will be created. One of [`linear`, `mlp`, `resnet`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. embedding_net_theta: Optional embedding network for parameters $\\theta$. embedding_net_x: Optional embedding network for simulation outputs $x$. This embedding net allows to learn features from potentially high-dimensional simulation outputs. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"embedding_net_x\" , \"embedding_net_y\" , ), ( z_score_theta , z_score_x , hidden_features , embedding_net_theta , embedding_net_x , ), ), ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"linear\" : return build_linear_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) if model == \"mlp\" : return build_mlp_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) if model == \"resnet\" : return build_resnet_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) else : raise NotImplementedError return build_fn","title":"classifier_nn()"},{"location":"reference/#potentials","text":"","title":"Potentials"},{"location":"reference/#sbi.inference.potentials.posterior_based_potential.posterior_estimator_based_potential","text":"Returns the potential for posterior-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. The potential is the same as the log-probability of the posterior_estimator , but it is set to \\(-\\inf\\) outside of the prior bounds. Parameters: Name Type Description Default posterior_estimator Module The neural network modelling the posterior. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the posterior. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/posterior_based_potential.py def posterior_estimator_based_potential ( posterior_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns the potential for posterior-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. The potential is the same as the log-probability of the `posterior_estimator`, but it is set to $-\\inf$ outside of the prior bounds. Args: posterior_estimator: The neural network modelling the posterior. prior: The prior distribution. x_o: The observed data at which to evaluate the posterior. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( posterior_estimator . parameters ()) . device ) potential_fn = PosteriorBasedPotential ( posterior_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform","title":"posterior_estimator_based_potential()"},{"location":"reference/#sbi.inference.potentials.likelihood_based_potential.likelihood_estimator_based_potential","text":"Returns potential \\(\\log(p(x_o|\\theta)p(\\theta))\\) for likelihood-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Parameters: Name Type Description Default likelihood_estimator Module The neural network modelling the likelihood. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the likelihood. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function \\(p(x_o|\\theta)p(\\theta)\\) and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/likelihood_based_potential.py def likelihood_estimator_based_potential ( likelihood_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns potential $\\log(p(x_o|\\theta)p(\\theta))$ for likelihood-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Args: likelihood_estimator: The neural network modelling the likelihood. prior: The prior distribution. x_o: The observed data at which to evaluate the likelihood. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function $p(x_o|\\theta)p(\\theta)$ and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( likelihood_estimator . parameters ()) . device ) potential_fn = LikelihoodBasedPotential ( likelihood_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform","title":"likelihood_estimator_based_potential()"},{"location":"reference/#sbi.inference.potentials.ratio_based_potential.ratio_estimator_based_potential","text":"Returns the potential for ratio-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Parameters: Name Type Description Default ratio_estimator Module The neural network modelling likelihood-to-evidence ratio. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the likelihood-to-evidence ratio. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/ratio_based_potential.py def ratio_estimator_based_potential ( ratio_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns the potential for ratio-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Args: ratio_estimator: The neural network modelling likelihood-to-evidence ratio. prior: The prior distribution. x_o: The observed data at which to evaluate the likelihood-to-evidence ratio. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( ratio_estimator . parameters ()) . device ) potential_fn = RatioBasedPotential ( ratio_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform","title":"ratio_estimator_based_potential()"},{"location":"reference/#analysis","text":"","title":"Analysis"},{"location":"reference/#sbi.analysis.plot.pairplot","text":"Plot samples in a 2D grid showing marginals and pairwise marginals. Each of the diagonal plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Each upper-diagonal plot can be interpreted as a 2D-marginal of the distribution. Parameters: Name Type Description Default samples Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Samples used to build the histogram. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] List of additional points to scatter. None limits Union[List, torch.Tensor] Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on). None offdiag Union[str, List[str]] Plotting style for upper diagonal, {hist, scatter, contour, cond, None}. 'hist' upper Optional[str] deprecated, use offdiag instead. None diag Union[str, List[str]] Plotting style for diagonal, {hist, cond, None}. 'hist' figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def pairplot ( samples : Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , limits : Optional [ Union [ List , torch . Tensor ]] = None , subset : Optional [ List [ int ]] = None , offdiag : Optional [ Union [ List [ str ], str ]] = \"hist\" , diag : Optional [ Union [ List [ str ], str ]] = \"hist\" , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], upper : Optional [ str ] = None , fig = None , axes = None , ** kwargs , ): \"\"\" Plot samples in a 2D grid showing marginals and pairwise marginals. Each of the diagonal plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Each upper-diagonal plot can be interpreted as a 2D-marginal of the distribution. Args: samples: Samples used to build the histogram. points: List of additional points to scatter. limits: Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on). offdiag: Plotting style for upper diagonal, {hist, scatter, contour, cond, None}. upper: deprecated, use offdiag instead. diag: Plotting style for diagonal, {hist, cond, None}. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" # TODO: add color map support # TODO: automatically determine good bin sizes for histograms # TODO: add legend (if legend is True) opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) samples , dim , limits = prepare_for_plot ( samples , limits ) # checks. if opts [ \"legend\" ]: assert len ( opts [ \"samples_labels\" ]) >= len ( samples ), \"Provide at least as many labels as samples.\" if opts [ \"upper\" ] is not None : warn ( \"upper is deprecated, use offdiag instead.\" ) opts [ \"offdiag\" ] = opts [ \"upper\" ] # Prepare diag/upper/lower if type ( opts [ \"diag\" ]) is not list : opts [ \"diag\" ] = [ opts [ \"diag\" ] for _ in range ( len ( samples ))] if type ( opts [ \"offdiag\" ]) is not list : opts [ \"offdiag\" ] = [ opts [ \"offdiag\" ] for _ in range ( len ( samples ))] # if type(opts['lower']) is not list: # opts['lower'] = [opts['lower'] for _ in range(len(samples))] opts [ \"lower\" ] = None diag_func = get_diag_func ( samples , limits , opts , ** kwargs ) def offdiag_func ( row , col , limits , ** kwargs ): if len ( samples ) > 0 : for n , v in enumerate ( samples ): if opts [ \"offdiag\" ][ n ] == \"hist\" or opts [ \"offdiag\" ][ n ] == \"hist2d\" : hist , xedges , yedges = np . histogram2d ( v [:, col ], v [:, row ], range = [ [ limits [ col ][ 0 ], limits [ col ][ 1 ]], [ limits [ row ][ 0 ], limits [ row ][ 1 ]], ], ** opts [ \"hist_offdiag\" ], ) plt . imshow ( hist . T , origin = \"lower\" , extent = ( xedges [ 0 ], xedges [ - 1 ], yedges [ 0 ], yedges [ - 1 ], ), aspect = \"auto\" , ) elif opts [ \"offdiag\" ][ n ] in [ \"kde\" , \"kde2d\" , \"contour\" , \"contourf\" , ]: density = gaussian_kde ( v [:, [ col , row ]] . T , bw_method = opts [ \"kde_offdiag\" ][ \"bw_method\" ], ) X , Y = np . meshgrid ( np . linspace ( limits [ col ][ 0 ], limits [ col ][ 1 ], opts [ \"kde_offdiag\" ][ \"bins\" ], ), np . linspace ( limits [ row ][ 0 ], limits [ row ][ 1 ], opts [ \"kde_offdiag\" ][ \"bins\" ], ), ) positions = np . vstack ([ X . ravel (), Y . ravel ()]) Z = np . reshape ( density ( positions ) . T , X . shape ) if opts [ \"offdiag\" ][ n ] == \"kde\" or opts [ \"offdiag\" ][ n ] == \"kde2d\" : plt . imshow ( Z , extent = ( limits [ col ][ 0 ], limits [ col ][ 1 ], limits [ row ][ 0 ], limits [ row ][ 1 ], ), origin = \"lower\" , aspect = \"auto\" , ) elif opts [ \"offdiag\" ][ n ] == \"contour\" : if opts [ \"contour_offdiag\" ][ \"percentile\" ]: Z = probs2contours ( Z , opts [ \"contour_offdiag\" ][ \"levels\" ]) else : Z = ( Z - Z . min ()) / ( Z . max () - Z . min ()) plt . contour ( X , Y , Z , origin = \"lower\" , extent = [ limits [ col ][ 0 ], limits [ col ][ 1 ], limits [ row ][ 0 ], limits [ row ][ 1 ], ], colors = opts [ \"samples_colors\" ][ n ], levels = opts [ \"contour_offdiag\" ][ \"levels\" ], ) else : pass elif opts [ \"offdiag\" ][ n ] == \"scatter\" : plt . scatter ( v [:, col ], v [:, row ], color = opts [ \"samples_colors\" ][ n ], ** opts [ \"scatter_offdiag\" ], ) elif opts [ \"offdiag\" ][ n ] == \"plot\" : plt . plot ( v [:, col ], v [:, row ], color = opts [ \"samples_colors\" ][ n ], ** opts [ \"plot_offdiag\" ], ) else : pass return _arrange_plots ( diag_func , offdiag_func , dim , limits , points , opts , fig = fig , axes = axes )","title":"pairplot()"},{"location":"reference/#sbi.analysis.plot.marginal_plot","text":"Plot samples in a row showing 1D marginals of selected dimensions. Each of the plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Parameters: Name Type Description Default samples Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Samples used to build the histogram. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] List of additional points to scatter. None limits Union[List, torch.Tensor] Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on). None diag Optional[str] Plotting style for 1D marginals, {hist, kde cond, None}. 'hist' figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] points_colors Colors of the points . required fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def marginal_plot ( samples : Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , limits : Optional [ Union [ List , torch . Tensor ]] = None , subset : Optional [ List [ int ]] = None , diag : Optional [ str ] = \"hist\" , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], fig = None , axes = None , ** kwargs , ): \"\"\" Plot samples in a row showing 1D marginals of selected dimensions. Each of the plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Args: samples: Samples used to build the histogram. points: List of additional points to scatter. limits: Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on). diag: Plotting style for 1D marginals, {hist, kde cond, None}. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. points_colors: Colors of the `points`. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) samples , dim , limits = prepare_for_plot ( samples , limits ) # Prepare diag/upper/lower if type ( opts [ \"diag\" ]) is not list : opts [ \"diag\" ] = [ opts [ \"diag\" ] for _ in range ( len ( samples ))] diag_func = get_diag_func ( samples , limits , opts , ** kwargs ) return _arrange_plots ( diag_func , None , dim , limits , points , opts , fig = fig , axes = axes )","title":"marginal_plot()"},{"location":"reference/#sbi.analysis.plot.conditional_pairplot","text":"Plot conditional distribution given all other parameters. The conditionals can be interpreted as slices through the density at a location given by condition . For example: Say we have a 3D density with parameters \\(\\theta_0\\) , \\(\\theta_1\\) , \\(\\theta_2\\) and a condition \\(c\\) passed by the user in the condition argument. For the plot of \\(\\theta_0\\) on the diagonal, this will plot the conditional \\(p(\\theta_0 | \\theta_1=c[1], \\theta_2=c[2])\\) . For the upper diagonal of \\(\\theta_1\\) and \\(\\theta_2\\) , it will plot \\(p(\\theta_1, \\theta_2 | \\theta_0=c[0])\\) . All other diagonals and upper-diagonals are built in the corresponding way. Parameters: Name Type Description Default density Any Probability density with a log_prob() method. required condition Tensor Condition that all but the one/two regarded parameters are fixed to. The condition should be of shape (1, dim_theta), i.e. it could e.g. be a sample from the posterior distribution. required limits Union[List, torch.Tensor] Limits in between which each parameter will be evaluated. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Additional points to scatter. None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on) None resolution int Resolution of the grid at which we evaluate the pdf . 50 figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] points_colors Colors of the points . required fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def conditional_pairplot ( density : Any , condition : torch . Tensor , limits : Union [ List , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , subset : Optional [ List [ int ]] = None , resolution : int = 50 , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], fig = None , axes = None , ** kwargs , ): r \"\"\" Plot conditional distribution given all other parameters. The conditionals can be interpreted as slices through the `density` at a location given by `condition`. For example: Say we have a 3D density with parameters $\\theta_0$, $\\theta_1$, $\\theta_2$ and a condition $c$ passed by the user in the `condition` argument. For the plot of $\\theta_0$ on the diagonal, this will plot the conditional $p(\\theta_0 | \\theta_1=c[1], \\theta_2=c[2])$. For the upper diagonal of $\\theta_1$ and $\\theta_2$, it will plot $p(\\theta_1, \\theta_2 | \\theta_0=c[0])$. All other diagonals and upper-diagonals are built in the corresponding way. Args: density: Probability density with a `log_prob()` method. condition: Condition that all but the one/two regarded parameters are fixed to. The condition should be of shape (1, dim_theta), i.e. it could e.g. be a sample from the posterior distribution. limits: Limits in between which each parameter will be evaluated. points: Additional points to scatter. subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on) resolution: Resolution of the grid at which we evaluate the `pdf`. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. points_colors: Colors of the `points`. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" device = density . _device if hasattr ( density , \"_device\" ) else \"cpu\" # Setting these is required because _pairplot_scaffold will check if opts['diag'] is # `None`. This would break if opts has no key 'diag'. Same for 'upper'. diag = \"cond\" offdiag = \"cond\" opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) opts [ \"lower\" ] = None dim , limits , eps_margins = prepare_for_conditional_plot ( condition , opts ) diag_func = get_conditional_diag_func ( opts , limits , eps_margins , resolution ) def offdiag_func ( row , col , ** kwargs ): p_image = ( eval_conditional_density ( opts [ \"density\" ], opts [ \"condition\" ] . to ( device ), limits . to ( device ), row , col , resolution = resolution , eps_margins1 = eps_margins [ row ], eps_margins2 = eps_margins [ col ], ) . to ( \"cpu\" ) . numpy () ) plt . imshow ( p_image . T , origin = \"lower\" , extent = ( limits [ col , 0 ] . item (), limits [ col , 1 ] . item (), limits [ row , 0 ] . item (), limits [ row , 1 ] . item (), ), aspect = \"auto\" , ) return _arrange_plots ( diag_func , offdiag_func , dim , limits , points , opts , fig = fig , axes = axes )","title":"conditional_pairplot()"},{"location":"reference/#sbi.analysis.conditional_density.conditional_corrcoeff","text":"Returns the conditional correlation matrix of a distribution. To compute the conditional distribution, we condition all but two parameters to values from condition , and then compute the Pearson correlation coefficient \\(\\rho\\) between the remaining two parameters under the distribution density . We do so for any pair of parameters specified in subset , thus creating a matrix containing conditional correlations between any pair of parameters. If condition is a batch of conditions, this function computes the conditional correlation matrix for each one of them and returns the mean. Parameters: Name Type Description Default density Any Probability density function with .log_prob() function. required limits Tensor Limits within which to evaluate the density . required condition Tensor Values to condition the density on. If a batch of conditions is passed, we compute the conditional correlation matrix for each of them and return the average conditional correlation matrix. required subset Optional[List[int]] Evaluate the conditional distribution only on a subset of dimensions. If None this function uses all dimensions. None resolution int Number of grid points on which the conditional distribution is evaluated. A higher value increases the accuracy of the estimated correlation but also increases the computational cost. 50 Returns: Average conditional correlation matrix of shape either (num_dim, num_dim) or (len(subset), len(subset)) if subset was specified. Source code in sbi/analysis/conditional_density.py def conditional_corrcoeff ( density : Any , limits : Tensor , condition : Tensor , subset : Optional [ List [ int ]] = None , resolution : int = 50 , ) -> Tensor : r \"\"\"Returns the conditional correlation matrix of a distribution. To compute the conditional distribution, we condition all but two parameters to values from `condition`, and then compute the Pearson correlation coefficient $\\rho$ between the remaining two parameters under the distribution `density`. We do so for any pair of parameters specified in `subset`, thus creating a matrix containing conditional correlations between any pair of parameters. If `condition` is a batch of conditions, this function computes the conditional correlation matrix for each one of them and returns the mean. Args: density: Probability density function with `.log_prob()` function. limits: Limits within which to evaluate the `density`. condition: Values to condition the `density` on. If a batch of conditions is passed, we compute the conditional correlation matrix for each of them and return the average conditional correlation matrix. subset: Evaluate the conditional distribution only on a subset of dimensions. If `None` this function uses all dimensions. resolution: Number of grid points on which the conditional distribution is evaluated. A higher value increases the accuracy of the estimated correlation but also increases the computational cost. Returns: Average conditional correlation matrix of shape either `(num_dim, num_dim)` or `(len(subset), len(subset))` if `subset` was specified. \"\"\" device = density . _device if hasattr ( density , \"_device\" ) else \"cpu\" subset_ = subset if subset is not None else range ( condition . shape [ 1 ]) correlation_matrices = [] for cond in condition : correlation_matrices . append ( torch . stack ( [ compute_corrcoeff ( eval_conditional_density ( density , cond . to ( device ), limits . to ( device ), dim1 = dim1 , dim2 = dim2 , resolution = resolution , ), limits [[ dim1 , dim2 ]] . to ( device ), ) for dim1 in subset_ for dim2 in subset_ if dim1 < dim2 ] ) ) average_correlations = torch . mean ( torch . stack ( correlation_matrices ), dim = 0 ) # `average_correlations` is still a vector containing the upper triangular entries. # Below, assemble them into a matrix: av_correlation_matrix = torch . zeros (( len ( subset_ ), len ( subset_ )), device = device ) triu_indices = torch . triu_indices ( row = len ( subset_ ), col = len ( subset_ ), offset = 1 , device = device ) av_correlation_matrix [ triu_indices [ 0 ], triu_indices [ 1 ]] = average_correlations # Make the matrix symmetric by copying upper diagonal to lower diagonal. av_correlation_matrix = torch . triu ( av_correlation_matrix ) + torch . tril ( av_correlation_matrix . T ) av_correlation_matrix . fill_diagonal_ ( 1.0 ) return av_correlation_matrix","title":"conditional_corrcoeff()"},{"location":"examples/00_HH_simulator/","text":"Inference on Hodgkin-Huxley model: tutorial \u00b6 In this tutorial, we use sbi to do inference on a Hodgkin-Huxley model from neuroscience (Hodgkin and Huxley, 1952). We will learn two parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ) based on a current-clamp recording, that we generate synthetically (in practice, this would be an experimental observation). Note, you find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/examples/00_HH_simulator.ipynb in the sbi repository. First we are going to import basic packages. import numpy as np import torch # visualization import matplotlib as mpl import matplotlib.pyplot as plt # sbi from sbi import utils as utils from sbi import analysis as analysis from sbi.inference.base import infer # remove top and right axis from plots mpl . rcParams [ \"axes.spines.right\" ] = False mpl . rcParams [ \"axes.spines.top\" ] = False Different required components \u00b6 Before running inference, let us define the different required components: observed data prior over model parameters simulator 1. Observed data \u00b6 Let us assume we current-clamped a neuron and recorded the following voltage trace: In fact, this voltage trace was not measured experimentally but synthetically generated by simulating a Hodgkin-Huxley model with particular parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ). We will come back to this point later in the tutorial. 2. Simulator \u00b6 We would like to infer the posterior over the two parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ) of a Hodgkin-Huxley model, given the observed electrophysiological recording above. The model has channel kinetics as in Pospischil et al. 2008 , and is defined by the following set of differential equations (parameters of interest highlighted in orange): \\[ \\scriptsize \\begin{align} C_m\\frac{dV}{dt}&=g_1\\left(E_1-V\\right)+ \\color{orange}{\\bar{g}_{Na}}m^3h\\left(E_{Na}-V\\right)+ \\color{orange}{\\bar{g}_{K}}n^4\\left(E_K-V\\right)+ \\bar{g}_Mp\\left(E_K-V\\right)+ I_{inj}+ \\sigma\\eta\\left(t\\right)\\\\ \\frac{dq}{dt}&=\\frac{q_\\infty\\left(V\\right)-q}{\\tau_q\\left(V\\right)},\\;q\\in\\{m,h,n,p\\} \\end{align} \\] Above, \\(V\\) represents the membrane potential, \\(C_m\\) is the membrane capacitance, \\(g_{\\text{l}}\\) is the leak conductance, \\(E_{\\text{l}}\\) is the membrane reversal potential, \\(\\bar{g}_c\\) is the density of channels of type \\(c\\) ( \\(\\text{Na}^+\\) , \\(\\text{K}^+\\) , M), \\(E_c\\) is the reversal potential of \\(c\\) , ( \\(m\\) , \\(h\\) , \\(n\\) , \\(p\\) ) are the respective channel gating kinetic variables, and \\(\\sigma \\eta(t)\\) is the intrinsic neural noise. The right hand side of the voltage dynamics is composed of a leak current, a voltage-dependent \\(\\text{Na}^+\\) current, a delayed-rectifier \\(\\text{K}^+\\) current, a slow voltage-dependent \\(\\text{K}^+\\) current responsible for spike-frequency adaptation, and an injected current \\(I_{\\text{inj}}\\) . Channel gating variables \\(q\\) have dynamics fully characterized by the neuron membrane potential \\(V\\) , given the respective steady-state \\(q_{\\infty}(V)\\) and time constant \\(\\tau_{q}(V)\\) (details in Pospischil et al. 2008). The input current \\(I_{\\text{inj}}\\) is defined as from HH_helper_functions import syn_current I , t_on , t_off , dt , t , A_soma = syn_current () The Hodgkin-Huxley simulator is given by: from HH_helper_functions import HHsimulator Putting the input current and the simulator together: def run_HH_model ( params ): params = np . asarray ( params ) # input current, time step I , t_on , t_off , dt , t , A_soma = syn_current () t = np . arange ( 0 , len ( I ), 1 ) * dt # initial voltage V0 = - 70 states = HHsimulator ( V0 , params . reshape ( 1 , - 1 ), dt , t , I ) return dict ( data = states . reshape ( - 1 ), time = t , dt = dt , I = I . reshape ( - 1 )) To get an idea of the output of the Hodgkin-Huxley model, let us generate some voltage traces for different parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), given the input current \\(I_{\\text{inj}}\\) : # three sets of (g_Na, g_K) params = np . array ([[ 50.0 , 1.0 ], [ 4.0 , 1.5 ], [ 20.0 , 15.0 ]]) num_samples = len ( params [:, 0 ]) sim_samples = np . zeros (( num_samples , len ( I ))) for i in range ( num_samples ): sim_samples [ i , :] = run_HH_model ( params = params [ i , :])[ \"data\" ] # colors for traces col_min = 2 num_colors = num_samples + col_min cm1 = mpl . cm . Blues col1 = [ cm1 ( 1.0 * i / num_colors ) for i in range ( col_min , num_colors )] fig = plt . figure ( figsize = ( 7 , 5 )) gs = mpl . gridspec . GridSpec ( 2 , 1 , height_ratios = [ 4 , 1 ]) ax = plt . subplot ( gs [ 0 ]) for i in range ( num_samples ): plt . plot ( t , sim_samples [ i , :], color = col1 [ i ], lw = 2 ) plt . ylabel ( \"voltage (mV)\" ) ax . set_xticks ([]) ax . set_yticks ([ - 80 , - 20 , 40 ]) ax = plt . subplot ( gs [ 1 ]) plt . plot ( t , I * A_soma * 1e3 , \"k\" , lw = 2 ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"input (nA)\" ) ax . set_xticks ([ 0 , max ( t ) / 2 , max ( t )]) ax . set_yticks ([ 0 , 1.1 * np . max ( I * A_soma * 1e3 )]) ax . yaxis . set_major_formatter ( mpl . ticker . FormatStrFormatter ( \" %.2f \" )) plt . show () As can be seen, the voltage traces can be quite diverse for different parameter values. Often, we are not interested in matching the exact trace, but only in matching certain features thereof. In this example of the Hodgkin-Huxley model, the summary features are the number of spikes, the mean resting potential, the standard deviation of the resting potential, and the first four voltage moments: mean, standard deviation, skewness and kurtosis. Using the function calculate_summary_statistics() imported below, we obtain these statistics from the output of the Hodgkin Huxley simulator. from HH_helper_functions import calculate_summary_statistics Lastly, we define a function that performs all of the above steps at once. The function simulation_wrapper takes in conductance values, runs the Hodgkin Huxley model and then returns the summary statistics. def simulation_wrapper ( params ): \"\"\" Returns summary statistics from conductance values in `params`. Summarizes the output of the HH simulator and converts it to `torch.Tensor`. \"\"\" obs = run_HH_model ( params ) summstats = torch . as_tensor ( calculate_summary_statistics ( obs )) return summstats sbi takes any function as simulator. Thus, sbi also has the flexibility to use simulators that utilize external packages, e.g., Brian ( http://briansimulator.org/ ), nest ( https://www.nest-simulator.org/ ), or NEURON ( https://neuron.yale.edu/neuron/ ). External simulators do not even need to be Python-based as long as they store simulation outputs in a format that can be read from Python. All that is necessary is to wrap your external simulator of choice into a Python callable that takes a parameter set and outputs a set of summary statistics we want to fit the parameters to. 3. Prior over model parameters \u00b6 Now that we have the simulator, we need to define a function with the prior over the model parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), which in this case is chosen to be a Uniform distribution: prior_min = [ 0.5 , 1e-4 ] prior_max = [ 80.0 , 15.0 ] prior = utils . torchutils . BoxUniform ( low = torch . as_tensor ( prior_min ), high = torch . as_tensor ( prior_max ) ) Inference \u00b6 Now that we have all the required components, we can run inference with SNPE to identify parameters whose activity matches this trace. posterior = infer ( simulation_wrapper , prior , method = \"SNPE\" , num_simulations = 300 , num_workers = 4 ) HBox(children=(FloatProgress(value=0.0, description='Running 300 simulations in 300 batches.', max=300.0, styl\u2026 Neural network successfully converged after 233 epochs. Note sbi can parallelize your simulator. If you experience problems with parallelization, try setting num_workers=1 and please give us an error report as a GitHub issue . Coming back to the observed data \u00b6 As mentioned at the beginning of the tutorial, the observed data are generated by the Hodgkin-Huxley model with a set of known parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ). To illustrate how to compute the summary statistics of the observed data, let us regenerate the observed data: # true parameters and respective labels true_params = np . array ([ 50.0 , 5.0 ]) labels_params = [ r \"$g_ {Na} $\" , r \"$g_ {K} $\" ] observation_trace = run_HH_model ( true_params ) observation_summary_statistics = calculate_summary_statistics ( observation_trace ) As we already shown above, the observed voltage traces look as follows: fig = plt . figure ( figsize = ( 7 , 5 )) gs = mpl . gridspec . GridSpec ( 2 , 1 , height_ratios = [ 4 , 1 ]) ax = plt . subplot ( gs [ 0 ]) plt . plot ( observation_trace [ \"time\" ], observation_trace [ \"data\" ]) plt . ylabel ( \"voltage (mV)\" ) plt . title ( \"observed data\" ) plt . setp ( ax , xticks = [], yticks = [ - 80 , - 20 , 40 ]) ax = plt . subplot ( gs [ 1 ]) plt . plot ( observation_trace [ \"time\" ], I * A_soma * 1e3 , \"k\" , lw = 2 ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"input (nA)\" ) ax . set_xticks ([ 0 , max ( observation_trace [ \"time\" ]) / 2 , max ( observation_trace [ \"time\" ])]) ax . set_yticks ([ 0 , 1.1 * np . max ( I * A_soma * 1e3 )]) ax . yaxis . set_major_formatter ( mpl . ticker . FormatStrFormatter ( \" %.2f \" )) Analysis of the posterior given the observed data \u00b6 After running the inference algorithm, let us inspect the inferred posterior distribution over the parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), given the observed trace. To do so, we first draw samples (i.e. consistent parameter sets) from the posterior: samples = posterior . sample (( 10000 ,), x = observation_summary_statistics ) HBox(children=(FloatProgress(value=0.0, description='Drawing 10000 posterior samples', max=10000.0, style=Prog\u2026 fig , axes = analysis . pairplot ( samples , limits = [[ 0.5 , 80 ], [ 1e-4 , 15.0 ]], ticks = [[ 0.5 , 80 ], [ 1e-4 , 15.0 ]], figsize = ( 5 , 5 ), points = true_params , points_offdiag = { \"markersize\" : 6 }, points_colors = \"r\" , ); As can be seen, the inferred posterior contains the ground-truth parameters (red) in a high-probability region. Now, let us sample parameters from the posterior distribution, simulate the Hodgkin-Huxley model for this parameter set and compare the simulations with the observed data: # Draw a sample from the posterior and convert to numpy for plotting. posterior_sample = posterior . sample (( 1 ,), x = observation_summary_statistics ) . numpy () HBox(children=(FloatProgress(value=0.0, description='Drawing 1 posterior samples', max=1.0, style=ProgressStyl\u2026 fig = plt . figure ( figsize = ( 7 , 5 )) # plot observation t = observation_trace [ \"time\" ] y_obs = observation_trace [ \"data\" ] plt . plot ( t , y_obs , lw = 2 , label = \"observation\" ) # simulate and plot samples x = run_HH_model ( posterior_sample ) plt . plot ( t , x [ \"data\" ], \"--\" , lw = 2 , label = \"posterior sample\" ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"voltage (mV)\" ) ax = plt . gca () handles , labels = ax . get_legend_handles_labels () ax . legend ( handles [:: - 1 ], labels [:: - 1 ], bbox_to_anchor = ( 1.3 , 1 ), loc = \"upper right\" ) ax . set_xticks ([ 0 , 60 , 120 ]) ax . set_yticks ([ - 80 , - 20 , 40 ]); As can be seen, the sample from the inferred posterior leads to simulations that closely resemble the observed data, confirming that SNPE did a good job at capturing the observed data in this simple case. References \u00b6 A. L. Hodgkin and A. F. Huxley. A quantitative description of membrane current and its application to conduction and excitation in nerve. The Journal of Physiology, 117(4):500\u2013544, 1952. M. Pospischil, M. Toledo-Rodriguez, C. Monier, Z. Piwkowska, T. Bal, Y. Fr\u00e9gnac, H. Markram, and A. Destexhe. Minimal Hodgkin-Huxley type models for different classes of cortical and thalamic neurons. Biological Cybernetics, 99(4-5), 2008.","title":"Hodgkin-Huxley example"},{"location":"examples/00_HH_simulator/#inference-on-hodgkin-huxley-model-tutorial","text":"In this tutorial, we use sbi to do inference on a Hodgkin-Huxley model from neuroscience (Hodgkin and Huxley, 1952). We will learn two parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ) based on a current-clamp recording, that we generate synthetically (in practice, this would be an experimental observation). Note, you find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/examples/00_HH_simulator.ipynb in the sbi repository. First we are going to import basic packages. import numpy as np import torch # visualization import matplotlib as mpl import matplotlib.pyplot as plt # sbi from sbi import utils as utils from sbi import analysis as analysis from sbi.inference.base import infer # remove top and right axis from plots mpl . rcParams [ \"axes.spines.right\" ] = False mpl . rcParams [ \"axes.spines.top\" ] = False","title":"Inference on Hodgkin-Huxley model: tutorial"},{"location":"examples/00_HH_simulator/#different-required-components","text":"Before running inference, let us define the different required components: observed data prior over model parameters simulator","title":"Different required components"},{"location":"examples/00_HH_simulator/#1-observed-data","text":"Let us assume we current-clamped a neuron and recorded the following voltage trace: In fact, this voltage trace was not measured experimentally but synthetically generated by simulating a Hodgkin-Huxley model with particular parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ). We will come back to this point later in the tutorial.","title":"1. Observed data"},{"location":"examples/00_HH_simulator/#2-simulator","text":"We would like to infer the posterior over the two parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ) of a Hodgkin-Huxley model, given the observed electrophysiological recording above. The model has channel kinetics as in Pospischil et al. 2008 , and is defined by the following set of differential equations (parameters of interest highlighted in orange): \\[ \\scriptsize \\begin{align} C_m\\frac{dV}{dt}&=g_1\\left(E_1-V\\right)+ \\color{orange}{\\bar{g}_{Na}}m^3h\\left(E_{Na}-V\\right)+ \\color{orange}{\\bar{g}_{K}}n^4\\left(E_K-V\\right)+ \\bar{g}_Mp\\left(E_K-V\\right)+ I_{inj}+ \\sigma\\eta\\left(t\\right)\\\\ \\frac{dq}{dt}&=\\frac{q_\\infty\\left(V\\right)-q}{\\tau_q\\left(V\\right)},\\;q\\in\\{m,h,n,p\\} \\end{align} \\] Above, \\(V\\) represents the membrane potential, \\(C_m\\) is the membrane capacitance, \\(g_{\\text{l}}\\) is the leak conductance, \\(E_{\\text{l}}\\) is the membrane reversal potential, \\(\\bar{g}_c\\) is the density of channels of type \\(c\\) ( \\(\\text{Na}^+\\) , \\(\\text{K}^+\\) , M), \\(E_c\\) is the reversal potential of \\(c\\) , ( \\(m\\) , \\(h\\) , \\(n\\) , \\(p\\) ) are the respective channel gating kinetic variables, and \\(\\sigma \\eta(t)\\) is the intrinsic neural noise. The right hand side of the voltage dynamics is composed of a leak current, a voltage-dependent \\(\\text{Na}^+\\) current, a delayed-rectifier \\(\\text{K}^+\\) current, a slow voltage-dependent \\(\\text{K}^+\\) current responsible for spike-frequency adaptation, and an injected current \\(I_{\\text{inj}}\\) . Channel gating variables \\(q\\) have dynamics fully characterized by the neuron membrane potential \\(V\\) , given the respective steady-state \\(q_{\\infty}(V)\\) and time constant \\(\\tau_{q}(V)\\) (details in Pospischil et al. 2008). The input current \\(I_{\\text{inj}}\\) is defined as from HH_helper_functions import syn_current I , t_on , t_off , dt , t , A_soma = syn_current () The Hodgkin-Huxley simulator is given by: from HH_helper_functions import HHsimulator Putting the input current and the simulator together: def run_HH_model ( params ): params = np . asarray ( params ) # input current, time step I , t_on , t_off , dt , t , A_soma = syn_current () t = np . arange ( 0 , len ( I ), 1 ) * dt # initial voltage V0 = - 70 states = HHsimulator ( V0 , params . reshape ( 1 , - 1 ), dt , t , I ) return dict ( data = states . reshape ( - 1 ), time = t , dt = dt , I = I . reshape ( - 1 )) To get an idea of the output of the Hodgkin-Huxley model, let us generate some voltage traces for different parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), given the input current \\(I_{\\text{inj}}\\) : # three sets of (g_Na, g_K) params = np . array ([[ 50.0 , 1.0 ], [ 4.0 , 1.5 ], [ 20.0 , 15.0 ]]) num_samples = len ( params [:, 0 ]) sim_samples = np . zeros (( num_samples , len ( I ))) for i in range ( num_samples ): sim_samples [ i , :] = run_HH_model ( params = params [ i , :])[ \"data\" ] # colors for traces col_min = 2 num_colors = num_samples + col_min cm1 = mpl . cm . Blues col1 = [ cm1 ( 1.0 * i / num_colors ) for i in range ( col_min , num_colors )] fig = plt . figure ( figsize = ( 7 , 5 )) gs = mpl . gridspec . GridSpec ( 2 , 1 , height_ratios = [ 4 , 1 ]) ax = plt . subplot ( gs [ 0 ]) for i in range ( num_samples ): plt . plot ( t , sim_samples [ i , :], color = col1 [ i ], lw = 2 ) plt . ylabel ( \"voltage (mV)\" ) ax . set_xticks ([]) ax . set_yticks ([ - 80 , - 20 , 40 ]) ax = plt . subplot ( gs [ 1 ]) plt . plot ( t , I * A_soma * 1e3 , \"k\" , lw = 2 ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"input (nA)\" ) ax . set_xticks ([ 0 , max ( t ) / 2 , max ( t )]) ax . set_yticks ([ 0 , 1.1 * np . max ( I * A_soma * 1e3 )]) ax . yaxis . set_major_formatter ( mpl . ticker . FormatStrFormatter ( \" %.2f \" )) plt . show () As can be seen, the voltage traces can be quite diverse for different parameter values. Often, we are not interested in matching the exact trace, but only in matching certain features thereof. In this example of the Hodgkin-Huxley model, the summary features are the number of spikes, the mean resting potential, the standard deviation of the resting potential, and the first four voltage moments: mean, standard deviation, skewness and kurtosis. Using the function calculate_summary_statistics() imported below, we obtain these statistics from the output of the Hodgkin Huxley simulator. from HH_helper_functions import calculate_summary_statistics Lastly, we define a function that performs all of the above steps at once. The function simulation_wrapper takes in conductance values, runs the Hodgkin Huxley model and then returns the summary statistics. def simulation_wrapper ( params ): \"\"\" Returns summary statistics from conductance values in `params`. Summarizes the output of the HH simulator and converts it to `torch.Tensor`. \"\"\" obs = run_HH_model ( params ) summstats = torch . as_tensor ( calculate_summary_statistics ( obs )) return summstats sbi takes any function as simulator. Thus, sbi also has the flexibility to use simulators that utilize external packages, e.g., Brian ( http://briansimulator.org/ ), nest ( https://www.nest-simulator.org/ ), or NEURON ( https://neuron.yale.edu/neuron/ ). External simulators do not even need to be Python-based as long as they store simulation outputs in a format that can be read from Python. All that is necessary is to wrap your external simulator of choice into a Python callable that takes a parameter set and outputs a set of summary statistics we want to fit the parameters to.","title":"2. Simulator"},{"location":"examples/00_HH_simulator/#3-prior-over-model-parameters","text":"Now that we have the simulator, we need to define a function with the prior over the model parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), which in this case is chosen to be a Uniform distribution: prior_min = [ 0.5 , 1e-4 ] prior_max = [ 80.0 , 15.0 ] prior = utils . torchutils . BoxUniform ( low = torch . as_tensor ( prior_min ), high = torch . as_tensor ( prior_max ) )","title":"3. Prior over model parameters"},{"location":"examples/00_HH_simulator/#inference","text":"Now that we have all the required components, we can run inference with SNPE to identify parameters whose activity matches this trace. posterior = infer ( simulation_wrapper , prior , method = \"SNPE\" , num_simulations = 300 , num_workers = 4 ) HBox(children=(FloatProgress(value=0.0, description='Running 300 simulations in 300 batches.', max=300.0, styl\u2026 Neural network successfully converged after 233 epochs. Note sbi can parallelize your simulator. If you experience problems with parallelization, try setting num_workers=1 and please give us an error report as a GitHub issue .","title":"Inference"},{"location":"examples/00_HH_simulator/#coming-back-to-the-observed-data","text":"As mentioned at the beginning of the tutorial, the observed data are generated by the Hodgkin-Huxley model with a set of known parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ). To illustrate how to compute the summary statistics of the observed data, let us regenerate the observed data: # true parameters and respective labels true_params = np . array ([ 50.0 , 5.0 ]) labels_params = [ r \"$g_ {Na} $\" , r \"$g_ {K} $\" ] observation_trace = run_HH_model ( true_params ) observation_summary_statistics = calculate_summary_statistics ( observation_trace ) As we already shown above, the observed voltage traces look as follows: fig = plt . figure ( figsize = ( 7 , 5 )) gs = mpl . gridspec . GridSpec ( 2 , 1 , height_ratios = [ 4 , 1 ]) ax = plt . subplot ( gs [ 0 ]) plt . plot ( observation_trace [ \"time\" ], observation_trace [ \"data\" ]) plt . ylabel ( \"voltage (mV)\" ) plt . title ( \"observed data\" ) plt . setp ( ax , xticks = [], yticks = [ - 80 , - 20 , 40 ]) ax = plt . subplot ( gs [ 1 ]) plt . plot ( observation_trace [ \"time\" ], I * A_soma * 1e3 , \"k\" , lw = 2 ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"input (nA)\" ) ax . set_xticks ([ 0 , max ( observation_trace [ \"time\" ]) / 2 , max ( observation_trace [ \"time\" ])]) ax . set_yticks ([ 0 , 1.1 * np . max ( I * A_soma * 1e3 )]) ax . yaxis . set_major_formatter ( mpl . ticker . FormatStrFormatter ( \" %.2f \" ))","title":"Coming back to the observed data"},{"location":"examples/00_HH_simulator/#analysis-of-the-posterior-given-the-observed-data","text":"After running the inference algorithm, let us inspect the inferred posterior distribution over the parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), given the observed trace. To do so, we first draw samples (i.e. consistent parameter sets) from the posterior: samples = posterior . sample (( 10000 ,), x = observation_summary_statistics ) HBox(children=(FloatProgress(value=0.0, description='Drawing 10000 posterior samples', max=10000.0, style=Prog\u2026 fig , axes = analysis . pairplot ( samples , limits = [[ 0.5 , 80 ], [ 1e-4 , 15.0 ]], ticks = [[ 0.5 , 80 ], [ 1e-4 , 15.0 ]], figsize = ( 5 , 5 ), points = true_params , points_offdiag = { \"markersize\" : 6 }, points_colors = \"r\" , ); As can be seen, the inferred posterior contains the ground-truth parameters (red) in a high-probability region. Now, let us sample parameters from the posterior distribution, simulate the Hodgkin-Huxley model for this parameter set and compare the simulations with the observed data: # Draw a sample from the posterior and convert to numpy for plotting. posterior_sample = posterior . sample (( 1 ,), x = observation_summary_statistics ) . numpy () HBox(children=(FloatProgress(value=0.0, description='Drawing 1 posterior samples', max=1.0, style=ProgressStyl\u2026 fig = plt . figure ( figsize = ( 7 , 5 )) # plot observation t = observation_trace [ \"time\" ] y_obs = observation_trace [ \"data\" ] plt . plot ( t , y_obs , lw = 2 , label = \"observation\" ) # simulate and plot samples x = run_HH_model ( posterior_sample ) plt . plot ( t , x [ \"data\" ], \"--\" , lw = 2 , label = \"posterior sample\" ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"voltage (mV)\" ) ax = plt . gca () handles , labels = ax . get_legend_handles_labels () ax . legend ( handles [:: - 1 ], labels [:: - 1 ], bbox_to_anchor = ( 1.3 , 1 ), loc = \"upper right\" ) ax . set_xticks ([ 0 , 60 , 120 ]) ax . set_yticks ([ - 80 , - 20 , 40 ]); As can be seen, the sample from the inferred posterior leads to simulations that closely resemble the observed data, confirming that SNPE did a good job at capturing the observed data in this simple case.","title":"Analysis of the posterior given the observed data"},{"location":"examples/00_HH_simulator/#references","text":"A. L. Hodgkin and A. F. Huxley. A quantitative description of membrane current and its application to conduction and excitation in nerve. The Journal of Physiology, 117(4):500\u2013544, 1952. M. Pospischil, M. Toledo-Rodriguez, C. Monier, Z. Piwkowska, T. Bal, Y. Fr\u00e9gnac, H. Markram, and A. Destexhe. Minimal Hodgkin-Huxley type models for different classes of cortical and thalamic neurons. Biological Cybernetics, 99(4-5), 2008.","title":"References"},{"location":"examples/01_decision_making_model/","text":"SBI for decision-making models \u00b6 In a previous tutorial , we showed how to use SBI with trial-based iid data. Such scenarios can arise, for example, in models of perceptual decision making. In addition to trial-based iid data points, these models often come with mixed data types and varying experimental conditions. Here, we show how sbi can be used to perform inference in such models with the MNLE method. Trial-based SBI with mixed data types \u00b6 In some cases, models with trial-based data additionally return data with mixed data types, e.g., continous and discrete data. For example, most computational models of decision-making have continuous reaction times and discrete choices as output. This can induce a problem when performing trial-based SBI that relies on learning a neural likelihood: It is challenging for most density estimators to handle both, continuous and discrete data at the same time. However, there is a recent SBI method for solving this problem, it\u2019s called Mixed Neural Likelihood Estimation (MNLE). It works just like NLE, but with mixed data types. The trick is that it learns two separate density estimators, one for the discrete part of the data, and one for the continuous part, and combines the two to obtain the final neural likelihood. Crucially, the continuous density estimator is trained conditioned on the output of the discrete one, such that statistical dependencies between the discrete and continuous data (e.g., between choices and reaction times) are modeled as well. The interested reader is referred to the original paper available here . MNLE was recently added to sbi (see this PR and also issue ) and follows the same API as SNLE . In this tutorial we will show how to apply MNLE to mixed data, and how to deal with varying experimental conditions. Toy problem for MNLE \u00b6 To illustrate MNLE we set up a toy simulator that outputs mixed data and for which we know the likelihood such we can obtain reference posterior samples via MCMC. Simulator : To simulate mixed data we do the following Sample reaction time from inverse Gamma Sample choices from Binomial Return reaction time \\(rt \\in (0, \\infty)\\) and choice index \\(c \\in \\{0, 1\\}\\) \\[ c \\sim \\text{Binomial}(\\rho) \\\\ rt \\sim \\text{InverseGamma}(\\alpha=2, \\beta) \\\\ \\] Prior : The priors of the two parameters \\(\\rho\\) and \\(\\beta\\) are independent. We define a Beta prior over the probabilty parameter of the Binomial used in the simulator and a Gamma prior over the shape-parameter of the inverse Gamma used in the simulator: \\[ p(\\beta, \\rho) = p(\\beta) \\; p(\\rho) ; \\\\ p(\\beta) = \\text{Gamma}(1, 0.5) \\\\ p(\\text{probs}) = \\text{Beta}(2, 2) \\] Because the InverseGamma and the Binomial likelihoods are well-defined we can perform MCMC on this problem and obtain reference-posterior samples. import matplotlib.pyplot as plt import torch from torch import Tensor from sbi.inference import MNLE from pyro.distributions import InverseGamma from torch.distributions import Beta , Binomial , Categorical , Gamma from sbi.utils import MultipleIndependent from sbi.utils.metrics import c2st from sbi.analysis import pairplot from sbi.inference import MCMCPosterior from sbi.utils.torchutils import atleast_2d from sbi.inference.potentials.likelihood_based_potential import ( MixedLikelihoodBasedPotential , ) from sbi.utils.conditional_density_utils import ConditionedPotential from sbi.utils import mcmc_transform from sbi.inference.potentials.base_potential import BasePotential # Toy simulator for mixed data def mixed_simulator ( theta : Tensor , concentration_scaling : float = 1.0 ): \"\"\"Returns a sample from a mixed distribution given parameters theta. Args: theta: batch of parameters, shape (batch_size, 2) concentration_scaling: scaling factor for the concentration parameter of the InverseGamma distribution, mimics an experimental condition. \"\"\" beta , ps = theta [:, : 1 ], theta [:, 1 :] choices = Binomial ( probs = ps ) . sample () rts = InverseGamma ( concentration = concentration_scaling * torch . ones_like ( beta ), rate = beta ) . sample () return torch . cat (( rts , choices ), dim = 1 ) # The potential function defines the ground truth likelihood and allows us to obtain reference posterior samples via MCMC. class PotentialFunctionProvider ( BasePotential ): allow_iid_x = True # type: ignore def __init__ ( self , prior , x_o , concentration_scaling = 1.0 , device = \"cpu\" ): super () . __init__ ( prior , x_o , device ) self . concentration_scaling = concentration_scaling def __call__ ( self , theta , track_gradients : bool = True ): theta = atleast_2d ( theta ) with torch . set_grad_enabled ( track_gradients ): iid_ll = self . iid_likelihood ( theta ) return iid_ll + self . prior . log_prob ( theta ) def iid_likelihood ( self , theta ): lp_choices = torch . stack ( [ Binomial ( probs = th . reshape ( 1 , - 1 )) . log_prob ( self . x_o [:, 1 :]) for th in theta [:, 1 :] ], dim = 1 , ) lp_rts = torch . stack ( [ InverseGamma ( concentration = self . concentration_scaling * torch . ones_like ( beta_i ), rate = beta_i , ) . log_prob ( self . x_o [:, : 1 ]) for beta_i in theta [:, : 1 ] ], dim = 1 , ) joint_likelihood = ( lp_choices + lp_rts ) . squeeze () assert joint_likelihood . shape == torch . Size ([ self . x_o . shape [ 0 ], theta . shape [ 0 ]]) return joint_likelihood . sum ( 0 ) # Define independent prior. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , ) Obtain reference-posterior samples via analytical likelihood and MCMC \u00b6 torch . manual_seed ( 42 ) num_trials = 10 num_samples = 1000 theta_o = prior . sample (( 1 ,)) x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) mcmc_kwargs = dict ( num_chains = 20 , warmup_steps = 50 , method = \"slice_np_vectorized\" , init_strategy = \"proposal\" , ) true_posterior = MCMCPosterior ( potential_fn = PotentialFunctionProvider ( prior , x_o ), proposal = prior , theta_transform = mcmc_transform ( prior , enable_transform = True ), ** mcmc_kwargs , ) true_samples = true_posterior . sample (( num_samples ,)) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 10 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00 1 , you might experience an error that a certain object from your simulator could not be pickled (an example can be found here ). This can be fixed by forcing sbi to pickle with dill instead of the default cloudpickle . To do so, adjust your code as follows: Install dill : pip install dill At the very beginning of your python script, set the pickler to dill : from joblib.externals.loky import set_loky_pickler set_loky_pickler ( \"dill\" ) Move all imports required by your simulator into the simulator: # Imports specified outside of the simulator will break dill: import torch def my_simulator ( parameters ): return torch . ones ( 1 , 10 ) # Therefore, move the imports into the simulator: def my_simulator ( parameters ): import torch return torch . ones ( 1 , 10 ) Alternative: parallelize yourself \u00b6 You can also write your own code to parallelize simulations with whatever multiprocessing framework you prefer. You can then simulate your data outside of sbi and pass the simulated data as shown in the flexible interface : Some more background \u00b6 sbi uses joblib to parallelize simulations, which in turn uses pickle or cloudpickle to serialize the simulator. Almost all simulators will be picklable with cloudpickle , but we have experienced issues e.g. with neuron simulators, see here .","title":"When using multiple workers, I get a pickling error. Can I still use multiprocessing?"},{"location":"faq/question_03/#when-using-multiple-workers-i-get-a-pickling-error-can-i-still-use-multiprocessing","text":"Yes, but you will have to make a few adjustments to your code. Some background: when using num_workers > 1 , you might experience an error that a certain object from your simulator could not be pickled (an example can be found here ). This can be fixed by forcing sbi to pickle with dill instead of the default cloudpickle . To do so, adjust your code as follows: Install dill : pip install dill At the very beginning of your python script, set the pickler to dill : from joblib.externals.loky import set_loky_pickler set_loky_pickler ( \"dill\" ) Move all imports required by your simulator into the simulator: # Imports specified outside of the simulator will break dill: import torch def my_simulator ( parameters ): return torch . ones ( 1 , 10 ) # Therefore, move the imports into the simulator: def my_simulator ( parameters ): import torch return torch . ones ( 1 , 10 )","title":"When using multiple workers, I get a pickling error. Can I still use multiprocessing?"},{"location":"faq/question_03/#alternative-parallelize-yourself","text":"You can also write your own code to parallelize simulations with whatever multiprocessing framework you prefer. You can then simulate your data outside of sbi and pass the simulated data as shown in the flexible interface :","title":"Alternative: parallelize yourself"},{"location":"faq/question_03/#some-more-background","text":"sbi uses joblib to parallelize simulations, which in turn uses pickle or cloudpickle to serialize the simulator. Almost all simulators will be picklable with cloudpickle , but we have experienced issues e.g. with neuron simulators, see here .","title":"Some more background"},{"location":"faq/question_04/","text":"Can I use the GPU for training the density estimator? \u00b6 TLDR; Yes, by passing device=\"cuda\" and by passing a prior that lives on the device name your passed. But no speed-ups for default density estimators. Yes. When creating the inference object in the flexible interface, you can pass the device as an argument, e.g., inference = SNPE ( prior , device = \"cuda\" , density_estimator = \"maf\" ) The device is set to \"cpu\" by default, and it can be set to anything, as long as it maps to an existing PyTorch CUDA device. sbi will take care of copying the net and the training data to and from the device . Note that the prior must be on the training device already, e.g., when passing device=\"cuda:0\" , make sure to pass a prior object that was created on that device, e.g., prior = torch.distributions.MultivariateNormal(loc=torch.zeros(2, device=\"cuda:0\"), covariance_matrix=torch.eye(2, device=\"cuda:0\")) . Performance \u00b6 Whether or not you reduce your training time when training on a GPU depends on the problem at hand. We provide a couple of default density estimators for SNPE , SNLE and SNRE , e.g., a mixture density network ( density_estimator=\"mdn\" ) or a Masked Autoregressive Flow ( density_estimator=\"maf\" ). For those default density estimators we do not expect a speed up. This is because the underlying neural networks are quite shallow and not tall, e.g., they do not have many parameters or matrix operations that profit a lot from being executed on the GPU. A speed up through training on the GPU will most likely become visible when you are using convolutional modules in your neural networks. E.g., when passing an embedding net for image processing like in this example: https://github.com/sbi-dev/sbi/blob/main/tutorials/05_embedding_net.ipynb .","title":"Can I use the GPU for training the density estimator?"},{"location":"faq/question_04/#can-i-use-the-gpu-for-training-the-density-estimator","text":"TLDR; Yes, by passing device=\"cuda\" and by passing a prior that lives on the device name your passed. But no speed-ups for default density estimators. Yes. When creating the inference object in the flexible interface, you can pass the device as an argument, e.g., inference = SNPE ( prior , device = \"cuda\" , density_estimator = \"maf\" ) The device is set to \"cpu\" by default, and it can be set to anything, as long as it maps to an existing PyTorch CUDA device. sbi will take care of copying the net and the training data to and from the device . Note that the prior must be on the training device already, e.g., when passing device=\"cuda:0\" , make sure to pass a prior object that was created on that device, e.g., prior = torch.distributions.MultivariateNormal(loc=torch.zeros(2, device=\"cuda:0\"), covariance_matrix=torch.eye(2, device=\"cuda:0\")) .","title":"Can I use the GPU for training the density estimator?"},{"location":"faq/question_04/#performance","text":"Whether or not you reduce your training time when training on a GPU depends on the problem at hand. We provide a couple of default density estimators for SNPE , SNLE and SNRE , e.g., a mixture density network ( density_estimator=\"mdn\" ) or a Masked Autoregressive Flow ( density_estimator=\"maf\" ). For those default density estimators we do not expect a speed up. This is because the underlying neural networks are quite shallow and not tall, e.g., they do not have many parameters or matrix operations that profit a lot from being executed on the GPU. A speed up through training on the GPU will most likely become visible when you are using convolutional modules in your neural networks. E.g., when passing an embedding net for image processing like in this example: https://github.com/sbi-dev/sbi/blob/main/tutorials/05_embedding_net.ipynb .","title":"Performance"},{"location":"faq/question_05/","text":"How should I save and load objects in sbi ? \u00b6 NeuralPosterior objects are picklable. import pickle # ... run inference posterior = inference . build_posterior () with open ( \"/path/to/my_posterior.pkl\" , \"wb\" ) as handle : pickle . dump ( posterior , handle ) Note: posterior objects that were saved under sbi v0.17.2 or older can not be loaded under sbi v0.18.0 or newer. Note: if you try to load a posterior that was saved under sbi v0.14.x or earlier under sbi v0.15.x until sbi v0.17.x , you have to add: import sys from sbi.utils import user_input_checks_utils sys . modules [ \"sbi.user_input.user_input_checks_utils\" ] = user_input_checks_utils to your script before loading the posterior. As of sbi v0.18.0 , NeuralInference objects are also picklable. import pickle # ... run inference posterior = inference . build_posterior () with open ( \"/path/to/my_inference.pkl\" , \"wb\" ) as handle : pickle . dump ( inference , handle ) However, saving and loading the inference object will slightly modify the object (in order to make it serializable). These modifications lead to the following two changes in behaviour: 1) Retraining from scratch is not supported, i.e. .train(..., retrain_from_scratch=True) does not work. 2) When the loaded object calls the .train() method, it generates a new tensorboard summary writer (instead of appending to the current one).","title":"How should I save and load objects in sbi?"},{"location":"faq/question_05/#how-should-i-save-and-load-objects-in-sbi","text":"NeuralPosterior objects are picklable. import pickle # ... run inference posterior = inference . build_posterior () with open ( \"/path/to/my_posterior.pkl\" , \"wb\" ) as handle : pickle . dump ( posterior , handle ) Note: posterior objects that were saved under sbi v0.17.2 or older can not be loaded under sbi v0.18.0 or newer. Note: if you try to load a posterior that was saved under sbi v0.14.x or earlier under sbi v0.15.x until sbi v0.17.x , you have to add: import sys from sbi.utils import user_input_checks_utils sys . modules [ \"sbi.user_input.user_input_checks_utils\" ] = user_input_checks_utils to your script before loading the posterior. As of sbi v0.18.0 , NeuralInference objects are also picklable. import pickle # ... run inference posterior = inference . build_posterior () with open ( \"/path/to/my_inference.pkl\" , \"wb\" ) as handle : pickle . dump ( inference , handle ) However, saving and loading the inference object will slightly modify the object (in order to make it serializable). These modifications lead to the following two changes in behaviour: 1) Retraining from scratch is not supported, i.e. .train(..., retrain_from_scratch=True) does not work. 2) When the loaded object calls the .train() method, it generates a new tensorboard summary writer (instead of appending to the current one).","title":"How should I save and load objects in sbi?"},{"location":"faq/question_06/","text":"Can I stop neural network training and resume it later? \u00b6 Many clusters have a time limit and sbi might exceed this limit. You can circumvent this problem by using the flexible interface . After simulations are finished, sbi trains a neural network. If this process takes too long, you can stop training and resume it later. The syntax is: inference = SNPE ( prior = prior ) inference = inference . append_simulations ( theta , x ) inference . train ( max_num_epochs = 300 ) # Pick `max_num_epochs` such that it does not exceed the runtime. with open ( \"path/to/my/inference.pkl\" , \"wb\" ) as handle : pickle . dump ( inference , handle ) # To resume training: with open ( \"path/to/my/inference.pkl\" , \"rb\" ) as handle : inference_from_disk = pickle . load ( handle ) inference_from_disk . train ( resume_training = True , max_num_epochs = 600 ) # Run epochs 301 until 600 (or stop early). posterior = inference_from_disk . build_posterior ()","title":"Can I stop neural network training and resume it later?"},{"location":"faq/question_06/#can-i-stop-neural-network-training-and-resume-it-later","text":"Many clusters have a time limit and sbi might exceed this limit. You can circumvent this problem by using the flexible interface . After simulations are finished, sbi trains a neural network. If this process takes too long, you can stop training and resume it later. The syntax is: inference = SNPE ( prior = prior ) inference = inference . append_simulations ( theta , x ) inference . train ( max_num_epochs = 300 ) # Pick `max_num_epochs` such that it does not exceed the runtime. with open ( \"path/to/my/inference.pkl\" , \"wb\" ) as handle : pickle . dump ( inference , handle ) # To resume training: with open ( \"path/to/my/inference.pkl\" , \"rb\" ) as handle : inference_from_disk = pickle . load ( handle ) inference_from_disk . train ( resume_training = True , max_num_epochs = 600 ) # Run epochs 301 until 600 (or stop early). posterior = inference_from_disk . build_posterior ()","title":"Can I stop neural network training and resume it later?"},{"location":"faq/question_07/","text":"Can I use a custom prior with sbi? \u00b6 sbi works with torch distributions only so we recommend to use those whenever possible. For example, when you are used to using scipy.stats distributions as priors then we recommend using the corresponding torch.distributions , most common distributions are implemented there. In case you want to use a custom prior that is not in the set of common distributions that\u2019s possible as well: You need to write a prior class that mimicks the behaviour of a torch.distributions.Distribution class. Then sbi will wrap this class to make it a fully functional torch Distribution . Essentially, the class needs two methods: .sample(sample_shape) , where sample_shape is a shape tuple, e.g., (n,) , and returns a batch of n samples, e.g., of shape (n, 2)` for a two dimenional prior. .log_prob(value) method that returns the \u201clog probs\u201d of parameters under the prior, e.g., for a batches of n parameters with shape (n, ndims) it should return a log probs array of shape (n,) . For sbi > 0.17.2 this could look like the following: class CustomUniformPrior : \"\"\"User defined numpy uniform prior. Custom prior with user-defined valid .sample and .log_prob methods. \"\"\" def __init__ ( self , lower : Tensor , upper : Tensor , return_numpy : bool = False ): self . lower = lower self . upper = upper self . dist = BoxUniform ( lower , upper ) self . return_numpy = return_numpy def sample ( self , sample_shape = torch . Size ([])): samples = self . dist . sample ( sample_shape ) return samples . numpy () if self . return_numpy else samples def log_prob ( self , values ): if self . return_numpy : values = torch . as_tensor ( values ) log_probs = self . dist . log_prob ( values ) return log_probs . numpy () if self . return_numpy else log_probs Once you have such a class you can wrap into a Distribution using the process_prior function sbi provides: from sbi.utils import process_prior custom_prior = CustomUniformPrior ( torch . zeros ( 2 ), torch . ones ( 2 )) prior , * _ = process_prior ( custom_prior ) # Keeping only the first return. # use this wrapped prior in sbi... In sbi it is sometimes required to check the support of the prior, e.g., when the prior support is bounded and one wants to reject samples from the posterior density estimator that lie outside the prior support. In torch Distributions this is handled automatically, however, when using a custom prior it is not. Thus, if your prior has bounded support (like the one above) it makes sense to pass the bounds to the wrapper function such that sbi can pass them to torch Distributions : from sbi.utils import process_prior custom_prior = CustomUniformPrior ( torch . zeros ( 2 ), torch . ones ( 2 )) prior = process_prior ( custom_prior , custom_prior_wrapper_kwargs = dict ( lower_bound = torch . zeros ( 2 ), upper_bound = torch . ones ( 2 ))) # use this wrapped prior in sbi... Note that in custom_prior_wrapper_kwargs you can pass additinal arguments for the wrapper, e.g., validate_args or arg_constraints see the Distribution documentation for more details. If you are running sbi < 0.17.2 and use SNLE the code above will produce a NotImplementedError (see #581 ). In this case you need to update to a newer version of sbi or use SNPE instead.","title":"Can I use a custom prior with sbi?"},{"location":"faq/question_07/#can-i-use-a-custom-prior-with-sbi","text":"sbi works with torch distributions only so we recommend to use those whenever possible. For example, when you are used to using scipy.stats distributions as priors then we recommend using the corresponding torch.distributions , most common distributions are implemented there. In case you want to use a custom prior that is not in the set of common distributions that\u2019s possible as well: You need to write a prior class that mimicks the behaviour of a torch.distributions.Distribution class. Then sbi will wrap this class to make it a fully functional torch Distribution . Essentially, the class needs two methods: .sample(sample_shape) , where sample_shape is a shape tuple, e.g., (n,) , and returns a batch of n samples, e.g., of shape (n, 2)` for a two dimenional prior. .log_prob(value) method that returns the \u201clog probs\u201d of parameters under the prior, e.g., for a batches of n parameters with shape (n, ndims) it should return a log probs array of shape (n,) . For sbi > 0.17.2 this could look like the following: class CustomUniformPrior : \"\"\"User defined numpy uniform prior. Custom prior with user-defined valid .sample and .log_prob methods. \"\"\" def __init__ ( self , lower : Tensor , upper : Tensor , return_numpy : bool = False ): self . lower = lower self . upper = upper self . dist = BoxUniform ( lower , upper ) self . return_numpy = return_numpy def sample ( self , sample_shape = torch . Size ([])): samples = self . dist . sample ( sample_shape ) return samples . numpy () if self . return_numpy else samples def log_prob ( self , values ): if self . return_numpy : values = torch . as_tensor ( values ) log_probs = self . dist . log_prob ( values ) return log_probs . numpy () if self . return_numpy else log_probs Once you have such a class you can wrap into a Distribution using the process_prior function sbi provides: from sbi.utils import process_prior custom_prior = CustomUniformPrior ( torch . zeros ( 2 ), torch . ones ( 2 )) prior , * _ = process_prior ( custom_prior ) # Keeping only the first return. # use this wrapped prior in sbi... In sbi it is sometimes required to check the support of the prior, e.g., when the prior support is bounded and one wants to reject samples from the posterior density estimator that lie outside the prior support. In torch Distributions this is handled automatically, however, when using a custom prior it is not. Thus, if your prior has bounded support (like the one above) it makes sense to pass the bounds to the wrapper function such that sbi can pass them to torch Distributions : from sbi.utils import process_prior custom_prior = CustomUniformPrior ( torch . zeros ( 2 ), torch . ones ( 2 )) prior = process_prior ( custom_prior , custom_prior_wrapper_kwargs = dict ( lower_bound = torch . zeros ( 2 ), upper_bound = torch . ones ( 2 ))) # use this wrapped prior in sbi... Note that in custom_prior_wrapper_kwargs you can pass additinal arguments for the wrapper, e.g., validate_args or arg_constraints see the Distribution documentation for more details. If you are running sbi < 0.17.2 and use SNLE the code above will produce a NotImplementedError (see #581 ). In this case you need to update to a newer version of sbi or use SNPE instead.","title":"Can I use a custom prior with sbi?"},{"location":"tutorial/00_getting_started/","text":"Getting started with sbi \u00b6 Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/00_getting_started.ipynb in the sbi repository. import torch from sbi import utils as utils from sbi import analysis as analysis from sbi.inference.base import infer Running the inference procedure \u00b6 sbi provides a simple interface to run state-of-the-art algorithms for simulation-based inference. For inference, you need to provide two ingredients: 1) a prior distribution that allows to sample parameter sets. 2) a simulator that takes parameter sets and produces simulation outputs. For example, we can have a 3-dimensional parameter space with a uniform prior between [-1,1] and a simple simulator that for the sake of example adds 1.0 and some Gaussian noise to the parameter set: num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def simulator ( parameter_set ): return 1.0 + parameter_set + torch . randn ( parameter_set . shape ) * 0.1 sbi can then run inference: # Other methods are \"SNLE\" or \"SNRE\". posterior = infer ( simulator , prior , method = \"SNPE\" , num_simulations = 1000 ) Running 1000 simulations.: 0%| | 0/1000 [00:001 , the posterior is no longer amortized: it will give good results when sampled around x=observation , but possibly bad results for other x . Once we have obtained the posterior, we can .sample() , .log_prob() , or .pairplot() in the same way as for the simple interface. posterior_samples = posterior . sample (( 10000 ,), x = x_o ) # plot posterior samples _ = analysis . pairplot ( posterior_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:001 , the posterior is no longer amortized: it will give good results when sampled around x=observation , but possibly bad results for other x . Once we have obtained the posterior, we can .sample() , .log_prob() , or .pairplot() in the same way as for the simple interface. posterior_samples = posterior . sample (( 10000 ,), x = x_o ) # plot posterior samples _ = analysis . pairplot ( posterior_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00 The simulator model \u00b6 The simulator model that we consider has two parameters: \\(r\\) and \\(\\theta\\) . On each run, it generates 100 two-dimensional points centered around \\((r \\cos(\\theta), r \\sin(\\theta))\\) and perturbed by a Gaussian noise with variance 0.01. Instead of simply outputting the \\((x,y)\\) coordinates of each data point, the model generates a grayscale image of the scattered points with dimensions 32 by 32. This image is further perturbed by an uniform noise with values betweeen 0 and 0.2. The code below defines such model. def simulator_model ( parameter , return_points = False ): \"\"\"Simulator model with two-dimensional input parameter and 1024-dimensional output This simulator serves as a basic example for using a neural net for learning summary features. It has only two input parameters but generates high-dimensional output vectors. The data is generated as follows: (-) Input: parameter = [r, theta] (1) Generate 100 two-dimensional points centered around (r cos(theta),r sin(theta)) and perturbed by a Gaussian noise with variance 0.01 (2) Create a grayscale image I of the scattered points with dimensions 32 by 32 (3) Perturb I with an uniform noise with values betweeen 0 and 0.2 (-) Output: I Parameters ---------- parameter : array-like, shape (2) The two input parameters of the model, ordered as [r, theta] return_points : bool (default: False) Whether the simulator should return the coordinates of the simulated data points as well Returns ------- I: torch tensor, shape (1, 1024) Output flattened image (optional) points: array-like, shape (100, 2) Coordinates of the 2D simulated data points \"\"\" r = parameter [ 0 ] theta = parameter [ 1 ] sigma_points = 0.10 npoints = 100 points = [] for _ in range ( npoints ): x = r * torch . cos ( theta ) + sigma_points * torch . randn ( 1 ) y = r * torch . sin ( theta ) + sigma_points * torch . randn ( 1 ) points . append ([ x , y ]) points = torch . as_tensor ( points ) nx = 32 ny = 32 sigma_image = 0.20 I = torch . zeros ( nx , ny ) for point in points : pi = int (( point [ 0 ] - ( - 1 )) / (( + 1 ) - ( - 1 )) * nx ) pj = int (( point [ 1 ] - ( - 1 )) / (( + 1 ) - ( - 1 )) * ny ) if ( pi < nx ) and ( pj < ny ): I [ pi , pj ] = 1 I = I + sigma_image * torch . rand ( nx , ny ) I = I . T I = I . reshape ( 1 , - 1 ) if return_points : return I , points else : return I The figure below shows an example of the output of the simulator when \\(r = 0.70\\) and \\(\\theta = \\pi/4\\) # simulate samples true_parameter = torch . tensor ([ 0.70 , torch . pi / 4 ]) x_observed , x_points = simulator_model ( true_parameter , return_points = True ) # plot the observation fig , ax = plt . subplots ( facecolor = \"white\" , figsize = ( 11.15 , 5.61 ), ncols = 2 , constrained_layout = True ) circle = plt . Circle (( 0 , 0 ), 1.0 , color = \"k\" , ls = \"--\" , lw = 0.8 , fill = False ) ax [ 0 ] . add_artist ( circle ) ax [ 0 ] . scatter ( x_points [:, 0 ], x_points [:, 1 ], s = 20 ) ax [ 0 ] . set_xlabel ( \"x\" ) ax [ 0 ] . set_ylabel ( \"y\" ) ax [ 0 ] . set_xlim ( - 1 , + 1 ) ax [ 0 ] . set_xticks ([ - 1 , 0.0 , + 1.0 ]) ax [ 0 ] . set_ylim ( - 1 , + 1 ) ax [ 0 ] . set_yticks ([ - 1 , 0.0 , + 1.0 ]) ax [ 0 ] . set_title ( r \"original simulated points with $r = 0.70$ and $\\theta = \\pi/4$\" ) ax [ 1 ] . imshow ( x_observed . view ( 32 , 32 ), origin = \"lower\" , cmap = \"gray\" ) ax [ 1 ] . set_xticks ([]) ax [ 1 ] . set_yticks ([]) ax [ 1 ] . set_title ( \"noisy observed data (gray image with 32 x 32 pixels)\" ) Text(0.5, 1.0, 'noisy observed data (gray image with 32 x 32 pixels)') Defining an embedding_net \u00b6 An inference procedure applied to the output data from this simulator model determines the posterior distribution of \\(r\\) and \\(\\theta\\) given an observation of \\(x\\) , which lives in a 1024 dimensional space (32 x 32 = 1024). To avoid working directly on these high-dimensional vectors, one can use a convolutional neural network (CNN) that takes the 32x32 images as input and encodes them into 8-dimensional feature vectors. This CNN is trained along with the neural density estimator of the inference procedure and serves as an automatic summary statistics extractor. We define and instantiate the CNN as follows: class SummaryNet ( nn . Module ): def __init__ ( self ): super () . __init__ () # 2D convolutional layer self . conv1 = nn . Conv2d ( in_channels = 1 , out_channels = 6 , kernel_size = 5 , padding = 2 ) # Maxpool layer that reduces 32x32 image to 4x4 self . pool = nn . MaxPool2d ( kernel_size = 8 , stride = 8 ) # Fully connected layer taking as input the 6 flattened output arrays from the maxpooling layer self . fc = nn . Linear ( in_features = 6 * 4 * 4 , out_features = 8 ) def forward ( self , x ): x = x . view ( - 1 , 1 , 32 , 32 ) x = self . pool ( F . relu ( self . conv1 ( x ))) x = x . view ( - 1 , 6 * 4 * 4 ) x = F . relu ( self . fc ( x )) return x embedding_net = SummaryNet () The inference procedure \u00b6 With the embedding_net defined and instantiated, we can follow the usual workflow of an inference procedure in sbi . The embedding_net object appears as an input argument when instantiating the neural density estimator with utils.posterior_nn . # set prior distribution for the parameters prior = utils . BoxUniform ( low = torch . tensor ([ 0.0 , 0.0 ]), high = torch . tensor ([ 1.0 , 2 * torch . pi ]) ) # make a SBI-wrapper on the simulator object for compatibility simulator_wrapper , prior = prepare_for_sbi ( simulator_model , prior ) # instantiate the neural density estimator neural_posterior = utils . posterior_nn ( model = \"maf\" , embedding_net = embedding_net , hidden_features = 10 , num_transforms = 2 ) # setup the inference procedure with the SNPE-C procedure inference = SNPE ( prior = prior , density_estimator = neural_posterior ) # run the inference procedure on one round and 10000 simulated data points theta , x = simulate_for_sbi ( simulator_wrapper , prior , num_simulations = 10000 ) Running 10000 simulations.: 0%| | 0/10000 [00:00","title":"Learning summary statistics with a neural net"},{"location":"tutorial/05_embedding_net/#the-simulator-model","text":"The simulator model that we consider has two parameters: \\(r\\) and \\(\\theta\\) . On each run, it generates 100 two-dimensional points centered around \\((r \\cos(\\theta), r \\sin(\\theta))\\) and perturbed by a Gaussian noise with variance 0.01. Instead of simply outputting the \\((x,y)\\) coordinates of each data point, the model generates a grayscale image of the scattered points with dimensions 32 by 32. This image is further perturbed by an uniform noise with values betweeen 0 and 0.2. The code below defines such model. def simulator_model ( parameter , return_points = False ): \"\"\"Simulator model with two-dimensional input parameter and 1024-dimensional output This simulator serves as a basic example for using a neural net for learning summary features. It has only two input parameters but generates high-dimensional output vectors. The data is generated as follows: (-) Input: parameter = [r, theta] (1) Generate 100 two-dimensional points centered around (r cos(theta),r sin(theta)) and perturbed by a Gaussian noise with variance 0.01 (2) Create a grayscale image I of the scattered points with dimensions 32 by 32 (3) Perturb I with an uniform noise with values betweeen 0 and 0.2 (-) Output: I Parameters ---------- parameter : array-like, shape (2) The two input parameters of the model, ordered as [r, theta] return_points : bool (default: False) Whether the simulator should return the coordinates of the simulated data points as well Returns ------- I: torch tensor, shape (1, 1024) Output flattened image (optional) points: array-like, shape (100, 2) Coordinates of the 2D simulated data points \"\"\" r = parameter [ 0 ] theta = parameter [ 1 ] sigma_points = 0.10 npoints = 100 points = [] for _ in range ( npoints ): x = r * torch . cos ( theta ) + sigma_points * torch . randn ( 1 ) y = r * torch . sin ( theta ) + sigma_points * torch . randn ( 1 ) points . append ([ x , y ]) points = torch . as_tensor ( points ) nx = 32 ny = 32 sigma_image = 0.20 I = torch . zeros ( nx , ny ) for point in points : pi = int (( point [ 0 ] - ( - 1 )) / (( + 1 ) - ( - 1 )) * nx ) pj = int (( point [ 1 ] - ( - 1 )) / (( + 1 ) - ( - 1 )) * ny ) if ( pi < nx ) and ( pj < ny ): I [ pi , pj ] = 1 I = I + sigma_image * torch . rand ( nx , ny ) I = I . T I = I . reshape ( 1 , - 1 ) if return_points : return I , points else : return I The figure below shows an example of the output of the simulator when \\(r = 0.70\\) and \\(\\theta = \\pi/4\\) # simulate samples true_parameter = torch . tensor ([ 0.70 , torch . pi / 4 ]) x_observed , x_points = simulator_model ( true_parameter , return_points = True ) # plot the observation fig , ax = plt . subplots ( facecolor = \"white\" , figsize = ( 11.15 , 5.61 ), ncols = 2 , constrained_layout = True ) circle = plt . Circle (( 0 , 0 ), 1.0 , color = \"k\" , ls = \"--\" , lw = 0.8 , fill = False ) ax [ 0 ] . add_artist ( circle ) ax [ 0 ] . scatter ( x_points [:, 0 ], x_points [:, 1 ], s = 20 ) ax [ 0 ] . set_xlabel ( \"x\" ) ax [ 0 ] . set_ylabel ( \"y\" ) ax [ 0 ] . set_xlim ( - 1 , + 1 ) ax [ 0 ] . set_xticks ([ - 1 , 0.0 , + 1.0 ]) ax [ 0 ] . set_ylim ( - 1 , + 1 ) ax [ 0 ] . set_yticks ([ - 1 , 0.0 , + 1.0 ]) ax [ 0 ] . set_title ( r \"original simulated points with $r = 0.70$ and $\\theta = \\pi/4$\" ) ax [ 1 ] . imshow ( x_observed . view ( 32 , 32 ), origin = \"lower\" , cmap = \"gray\" ) ax [ 1 ] . set_xticks ([]) ax [ 1 ] . set_yticks ([]) ax [ 1 ] . set_title ( \"noisy observed data (gray image with 32 x 32 pixels)\" ) Text(0.5, 1.0, 'noisy observed data (gray image with 32 x 32 pixels)')","title":"The simulator model"},{"location":"tutorial/05_embedding_net/#defining-an-embedding_net","text":"An inference procedure applied to the output data from this simulator model determines the posterior distribution of \\(r\\) and \\(\\theta\\) given an observation of \\(x\\) , which lives in a 1024 dimensional space (32 x 32 = 1024). To avoid working directly on these high-dimensional vectors, one can use a convolutional neural network (CNN) that takes the 32x32 images as input and encodes them into 8-dimensional feature vectors. This CNN is trained along with the neural density estimator of the inference procedure and serves as an automatic summary statistics extractor. We define and instantiate the CNN as follows: class SummaryNet ( nn . Module ): def __init__ ( self ): super () . __init__ () # 2D convolutional layer self . conv1 = nn . Conv2d ( in_channels = 1 , out_channels = 6 , kernel_size = 5 , padding = 2 ) # Maxpool layer that reduces 32x32 image to 4x4 self . pool = nn . MaxPool2d ( kernel_size = 8 , stride = 8 ) # Fully connected layer taking as input the 6 flattened output arrays from the maxpooling layer self . fc = nn . Linear ( in_features = 6 * 4 * 4 , out_features = 8 ) def forward ( self , x ): x = x . view ( - 1 , 1 , 32 , 32 ) x = self . pool ( F . relu ( self . conv1 ( x ))) x = x . view ( - 1 , 6 * 4 * 4 ) x = F . relu ( self . fc ( x )) return x embedding_net = SummaryNet ()","title":"Defining an embedding_net"},{"location":"tutorial/05_embedding_net/#the-inference-procedure","text":"With the embedding_net defined and instantiated, we can follow the usual workflow of an inference procedure in sbi . The embedding_net object appears as an input argument when instantiating the neural density estimator with utils.posterior_nn . # set prior distribution for the parameters prior = utils . BoxUniform ( low = torch . tensor ([ 0.0 , 0.0 ]), high = torch . tensor ([ 1.0 , 2 * torch . pi ]) ) # make a SBI-wrapper on the simulator object for compatibility simulator_wrapper , prior = prepare_for_sbi ( simulator_model , prior ) # instantiate the neural density estimator neural_posterior = utils . posterior_nn ( model = \"maf\" , embedding_net = embedding_net , hidden_features = 10 , num_transforms = 2 ) # setup the inference procedure with the SNPE-C procedure inference = SNPE ( prior = prior , density_estimator = neural_posterior ) # run the inference procedure on one round and 10000 simulated data points theta , x = simulate_for_sbi ( simulator_wrapper , prior , num_simulations = 10000 ) Running 10000 simulations.: 0%| | 0/10000 [00:00] 1.3 Summary statistics \u00b6 We will compare two methods for defining summary statistics. One method uses three summary statistics which are function evaluations at three points in time. The other method uses a single summary statistic: the mean squared error between the observed and the simulated trace. In the second case, one then tries to obtain the posterior \\(p(\\theta | 0)\\) , i.e. the error being zero. These two methods are implemented below: \\(\\textbf{get_3_values()}\\) returns 3 function evaluations at \\(x=-0.5, x=0\\) and \\(x=0.75\\) . \\(\\textbf{get_MSE()}\\) returns the mean squared error between true and a quadratic function corresponding to a prior distributions sample. def get_3_values ( theta , seed = None ): \"\"\" Return 3 'y' values corresponding to x=-0.5,0,0.75 as summary statistic vector \"\"\" return np . array ( [ eval ( theta , - 0.5 , seed = seed ), eval ( theta , 0 , seed = seed ), eval ( theta , 0.75 , seed = seed ), ] ) . T def get_MSE ( theta , theta_o , seed = None ): \"\"\" Return the mean-squared error (MSE) i.e. Euclidean distance from the observation function \"\"\" _ , y = create_t_x ( theta_o , seed = seed ) # truth _ , y_ = create_t_x ( theta , seed = seed ) # simulations return np . mean ( np . square ( y_ - y ), axis = 0 , keepdims = True ) . T # MSE Let\u2019s try a couple of samples from our prior and see their summary statistics. Notice that these indeed change in small amounts every time you rerun it due to the noise, except if you set the seed. 1.4 Simulating data \u00b6 Let us see various plots of prior samples and their summary statistics versus the truth, i.e. our artificial observation. t , x_truth = create_t_x ( theta_o ) plt . plot ( t , x_truth , \"k\" , zorder = 1 , label = \"truth\" ) n_samples = 100 theta = prior . sample (( n_samples ,)) t , x = create_t_x ( theta . numpy ()) plt . plot ( t , x , \"grey\" , zorder = 0 ) plt . legend () In summary, we defined reasonable summary statistics and, a priori, there might be an appararent reason why one method would be better than another. When we do inference, we\u2019d like our posterior to focus around parameter samples that have their simulated MSE very close to 0 (i.e. the truth MSE summary statistic) or their 3 extracted \\((t, x)\\) coordinates to be the truthful ones. 1.5 Inference \u00b6 1.5.1 Using the MSE \u00b6 Let\u2019s see if we can use the MSE to recover the true observation parameters \\(\\theta_o=(a_0,b_0,c_0)\\) . theta = prior . sample (( 1000 ,)) x = get_MSE ( theta . numpy (), theta_o ) theta = torch . as_tensor ( theta , dtype = torch . float32 ) x = torch . as_tensor ( x , dtype = torch . float32 ) inference = SNPE ( prior ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () Neural network successfully converged after 181 epochs. Now that we\u2019ve build the posterior as such, we can see how likely it finds certain parameters given that we tell it that we\u2019ve observed a certain summary statistic (in this case the MSE). We can then sample from it. x_o = torch . as_tensor ( [ [ 0.0 , ] ] ) theta_p = posterior . sample (( 10000 ,), x = x_o ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00 The functions are a bit closer to the observation than prior samples, but many posterior samples generate activity that is very far off from the observation. We would expect sbi do better on such a simple example. So what\u2019s going on? Do we need more simulations? Feel free to try, but below we will show that one can use the same number of simulation samples with different summary statistics and do much better. 1.5.2 Using 3 coordinates as summary statistics \u00b6 x = get_3_values ( theta . numpy ()) x = torch . as_tensor ( x , dtype = torch . float32 ) inference = SNPE ( prior ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () Neural network successfully converged after 127 epochs. The observation is now given by the values of the observed trace at three different coordinates: x_o = torch . as_tensor ( get_3_values ( theta_o ), dtype = float ) theta_p = posterior . sample (( 10000 ,), x = x_o ) fig , axes = pairplot ( theta_p , limits = list ( zip ( prior_min , prior_max )), ticks = list ( zip ( prior_min , prior_max )), figsize = ( 7 , 7 ), labels = [ \"a\" , \"b\" , \"c\" ], points_offdiag = { \"markersize\" : 6 }, points_colors = \"r\" , points = theta_o , ); Drawing 10000 posterior samples: 0%| | 0/10000 [00:00 Ok this definitely seems to work! The posterior correctly focuses on the true parameters with greater confidence. You can experiment yourself how this improves further with more training samples or you could try to see how many you\u2019d exactly need to keep having a satisfyingly looking posterior and high posterior sample simulations. So, what\u2019s up with the MSE? Why does it not seem so informative to constrain the posterior? In 1.6, we\u2019ll see both the power and pitfalls of summary statistics. 1.6 Prior simulations\u2019 summary statistics vs observed summary statistics \u00b6 Let\u2019s try to understand this\u2026Let\u2019s look at a histogram of the four summary statistics we\u2019ve experimented with, and see how they compare to our observed truth summary statistic vector: stats = np . concatenate ( ( get_3_values ( theta . numpy ()), get_MSE ( theta . numpy (), theta_o )), axis = 1 ) x_o = np . concatenate (( get_3_values ( theta_o ), np . asarray ([[ 0.0 ]])), axis = 1 ) features = [ \"y @ x=-0.5\" , \"y @ x=0\" , \"y @ x=0.7\" , \"MSE\" ] fig , axes = plt . subplots ( 1 , 4 , figsize = ( 10 , 3 )) xlabelfontsize = 10 for i , ax in enumerate ( axes . reshape ( - 1 )): ax . hist ( stats [:, i ], color = [ \"grey\" ], alpha = 0.5 , bins = 30 , density = True , histtype = \"stepfilled\" , label = [ \"simulations\" ], ) ax . axvline ( x_o [:, i ], label = \"observation\" ) ax . set_xlabel ( features [ i ], fontsize = xlabelfontsize ) if i == 3 : ax . legend () plt . tight_layout () We see that for the coordinates (three plots on the left), simulations cover the observation. That is: it covers it from the left and right side in each case. For the MSE, simulations never truly reach the observation \\(0.0\\) . For the trained neural network, it is strongly preferable if the simulations cover the observation. In that case, the neural network can interpolate between simulated data. Contrary to that, for the MSE, the neural network has to extrapolate : it never observes a simulation that is to the left of the observation and has to extrapolate to the region of MSE= \\(0.0\\) . This seems like a technical point but, as we saw above, it makes a huge difference in performance. 1.7 Explicit recommendations \u00b6 We give some explicit recommendation when using summary statistics Visualize the histogram of each summary statistic and plot the value of the observation. If, for some summary statistics, the observation is not covered (or is at the very border, e.g. the MSE above), the trained neural network will struggle. Do not use an \u201cerror\u201d as summary statistic. This is common in optimization (e.g. genetic algorithms), but it often leads to trouble in sbi due to the reason above. Only use summary statistics that are necessary. The less summary statistics you use, the less can go wrong with them. Of course, you have to ensure that the summary statistics describe the raw data sufficiently well.","title":"Crafting summary statistics"},{"location":"tutorial/10_crafting_summary_statistics/#crafting-summary-statistics","text":"Many simulators produce outputs that are high-dimesional. For example, a simulator might generate a time series or an image. In a previous tutorial , we discussed how a neural networks can be used to learn summary statistics from such data. In this notebook, we will instead focus on hand-crafting summary statistics. We demonstrate that the choice of summary statistics can be crucial for the performance of the inference algorithm. import numpy as np import torch import matplotlib.pyplot as plt import matplotlib as mpl # sbi import sbi.utils as utils from sbi.inference.base import infer from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi from sbi.utils.get_nn_models import posterior_nn from sbi.analysis import pairplot # remove top and right axis from plots mpl . rcParams [ \"axes.spines.right\" ] = False mpl . rcParams [ \"axes.spines.top\" ] = False This notebook is not intended to provide a one-fits-all approach. In fact it argues against this: it argues for the user to carefully construct their summary statistics to (i) further help the user understand his observed data, (ii) help them understand exactly what they want the model to recover from the observation and (iii) help the inference framework itself.","title":"Crafting summary statistics"},{"location":"tutorial/10_crafting_summary_statistics/#example-1-the-quadratic-function","text":"Assume we have a simulator that is given by a quadratic function: \\(x(t) = a\\cdot t^2 + b\\cdot t + c + \\epsilon\\) , where \\(\\epsilon\\) is Gaussian observation noise and \\(\\theta = \\{a, b, c\\}\\) are the parameters. Given an observed quadratic function \\(x_o\\) , we would like to recover the posterior over parameters \\(a_o\\) , \\(b_o\\) and \\(c_o\\) .","title":"Example 1: The quadratic function"},{"location":"tutorial/10_crafting_summary_statistics/#11-prior-over-parameters","text":"First we define a prior distribution over parameters \\(a\\) , \\(b\\) and \\(c\\) . Here, we use a uniform prior for \\(a\\) , \\(b\\) and \\(c\\) to go from \\(-1\\) to \\(1\\) . prior_min = [ - 1 , - 1 , - 1 ] prior_max = [ 1 , 1 , 1 ] prior = utils . torchutils . BoxUniform ( low = torch . as_tensor ( prior_min ), high = torch . as_tensor ( prior_max ) )","title":"1.1 Prior over parameters"},{"location":"tutorial/10_crafting_summary_statistics/#12-simulator","text":"Defining some helper functions first: def create_t_x ( theta , seed = None ): \"\"\"Return an t, x array for plotting based on params\"\"\" if theta . ndim == 1 : theta = theta [ np . newaxis , :] if seed is not None : rng = np . random . RandomState ( seed ) else : rng = np . random . RandomState () t = np . linspace ( - 1 , 1 , 200 ) ts = np . repeat ( t [:, np . newaxis ], theta . shape [ 0 ], axis = 1 ) x = ( theta [:, 0 ] * ts ** 2 + theta [:, 1 ] * ts + theta [:, 2 ] + 0.01 * rng . randn ( ts . shape [ 0 ], theta . shape [ 0 ]) ) return t , x def eval ( theta , t , seed = None ): \"\"\"Evaluate the quadratic function at `t`\"\"\" if theta . ndim == 1 : theta = theta [ np . newaxis , :] if seed is not None : rng = np . random . RandomState ( seed ) else : rng = np . random . RandomState () return theta [:, 0 ] * t ** 2 + theta [:, 1 ] * t + theta [:, 2 ] + 0.01 * rng . randn ( 1 ) In this example, we generate the observation \\(x_o\\) from parameters \\(\\theta_o=(a_o, b_o, c_o)=(0.3, -0.2, -0.1)\\) . The observation as follows. theta_o = np . array ([ 0.3 , - 0.2 , - 0.1 ]) t , x = create_t_x ( theta_o ) plt . plot ( t , x , \"k\" ) []","title":"1.2 Simulator"},{"location":"tutorial/10_crafting_summary_statistics/#13-summary-statistics","text":"We will compare two methods for defining summary statistics. One method uses three summary statistics which are function evaluations at three points in time. The other method uses a single summary statistic: the mean squared error between the observed and the simulated trace. In the second case, one then tries to obtain the posterior \\(p(\\theta | 0)\\) , i.e. the error being zero. These two methods are implemented below: \\(\\textbf{get_3_values()}\\) returns 3 function evaluations at \\(x=-0.5, x=0\\) and \\(x=0.75\\) . \\(\\textbf{get_MSE()}\\) returns the mean squared error between true and a quadratic function corresponding to a prior distributions sample. def get_3_values ( theta , seed = None ): \"\"\" Return 3 'y' values corresponding to x=-0.5,0,0.75 as summary statistic vector \"\"\" return np . array ( [ eval ( theta , - 0.5 , seed = seed ), eval ( theta , 0 , seed = seed ), eval ( theta , 0.75 , seed = seed ), ] ) . T def get_MSE ( theta , theta_o , seed = None ): \"\"\" Return the mean-squared error (MSE) i.e. Euclidean distance from the observation function \"\"\" _ , y = create_t_x ( theta_o , seed = seed ) # truth _ , y_ = create_t_x ( theta , seed = seed ) # simulations return np . mean ( np . square ( y_ - y ), axis = 0 , keepdims = True ) . T # MSE Let\u2019s try a couple of samples from our prior and see their summary statistics. Notice that these indeed change in small amounts every time you rerun it due to the noise, except if you set the seed.","title":"1.3 Summary statistics"},{"location":"tutorial/10_crafting_summary_statistics/#14-simulating-data","text":"Let us see various plots of prior samples and their summary statistics versus the truth, i.e. our artificial observation. t , x_truth = create_t_x ( theta_o ) plt . plot ( t , x_truth , \"k\" , zorder = 1 , label = \"truth\" ) n_samples = 100 theta = prior . sample (( n_samples ,)) t , x = create_t_x ( theta . numpy ()) plt . plot ( t , x , \"grey\" , zorder = 0 ) plt . legend () In summary, we defined reasonable summary statistics and, a priori, there might be an appararent reason why one method would be better than another. When we do inference, we\u2019d like our posterior to focus around parameter samples that have their simulated MSE very close to 0 (i.e. the truth MSE summary statistic) or their 3 extracted \\((t, x)\\) coordinates to be the truthful ones.","title":"1.4 Simulating data"},{"location":"tutorial/10_crafting_summary_statistics/#15-inference","text":"","title":"1.5 Inference"},{"location":"tutorial/10_crafting_summary_statistics/#151-using-the-mse","text":"Let\u2019s see if we can use the MSE to recover the true observation parameters \\(\\theta_o=(a_0,b_0,c_0)\\) . theta = prior . sample (( 1000 ,)) x = get_MSE ( theta . numpy (), theta_o ) theta = torch . as_tensor ( theta , dtype = torch . float32 ) x = torch . as_tensor ( x , dtype = torch . float32 ) inference = SNPE ( prior ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () Neural network successfully converged after 181 epochs. Now that we\u2019ve build the posterior as such, we can see how likely it finds certain parameters given that we tell it that we\u2019ve observed a certain summary statistic (in this case the MSE). We can then sample from it. x_o = torch . as_tensor ( [ [ 0.0 , ] ] ) theta_p = posterior . sample (( 10000 ,), x = x_o ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00 The functions are a bit closer to the observation than prior samples, but many posterior samples generate activity that is very far off from the observation. We would expect sbi do better on such a simple example. So what\u2019s going on? Do we need more simulations? Feel free to try, but below we will show that one can use the same number of simulation samples with different summary statistics and do much better.","title":"1.5.1 Using the MSE"},{"location":"tutorial/10_crafting_summary_statistics/#152-using-3-coordinates-as-summary-statistics","text":"x = get_3_values ( theta . numpy ()) x = torch . as_tensor ( x , dtype = torch . float32 ) inference = SNPE ( prior ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () Neural network successfully converged after 127 epochs. The observation is now given by the values of the observed trace at three different coordinates: x_o = torch . as_tensor ( get_3_values ( theta_o ), dtype = float ) theta_p = posterior . sample (( 10000 ,), x = x_o ) fig , axes = pairplot ( theta_p , limits = list ( zip ( prior_min , prior_max )), ticks = list ( zip ( prior_min , prior_max )), figsize = ( 7 , 7 ), labels = [ \"a\" , \"b\" , \"c\" ], points_offdiag = { \"markersize\" : 6 }, points_colors = \"r\" , points = theta_o , ); Drawing 10000 posterior samples: 0%| | 0/10000 [00:00 Ok this definitely seems to work! The posterior correctly focuses on the true parameters with greater confidence. You can experiment yourself how this improves further with more training samples or you could try to see how many you\u2019d exactly need to keep having a satisfyingly looking posterior and high posterior sample simulations. So, what\u2019s up with the MSE? Why does it not seem so informative to constrain the posterior? In 1.6, we\u2019ll see both the power and pitfalls of summary statistics.","title":"1.5.2 Using 3 coordinates as summary statistics"},{"location":"tutorial/10_crafting_summary_statistics/#16-prior-simulations-summary-statistics-vs-observed-summary-statistics","text":"Let\u2019s try to understand this\u2026Let\u2019s look at a histogram of the four summary statistics we\u2019ve experimented with, and see how they compare to our observed truth summary statistic vector: stats = np . concatenate ( ( get_3_values ( theta . numpy ()), get_MSE ( theta . numpy (), theta_o )), axis = 1 ) x_o = np . concatenate (( get_3_values ( theta_o ), np . asarray ([[ 0.0 ]])), axis = 1 ) features = [ \"y @ x=-0.5\" , \"y @ x=0\" , \"y @ x=0.7\" , \"MSE\" ] fig , axes = plt . subplots ( 1 , 4 , figsize = ( 10 , 3 )) xlabelfontsize = 10 for i , ax in enumerate ( axes . reshape ( - 1 )): ax . hist ( stats [:, i ], color = [ \"grey\" ], alpha = 0.5 , bins = 30 , density = True , histtype = \"stepfilled\" , label = [ \"simulations\" ], ) ax . axvline ( x_o [:, i ], label = \"observation\" ) ax . set_xlabel ( features [ i ], fontsize = xlabelfontsize ) if i == 3 : ax . legend () plt . tight_layout () We see that for the coordinates (three plots on the left), simulations cover the observation. That is: it covers it from the left and right side in each case. For the MSE, simulations never truly reach the observation \\(0.0\\) . For the trained neural network, it is strongly preferable if the simulations cover the observation. In that case, the neural network can interpolate between simulated data. Contrary to that, for the MSE, the neural network has to extrapolate : it never observes a simulation that is to the left of the observation and has to extrapolate to the region of MSE= \\(0.0\\) . This seems like a technical point but, as we saw above, it makes a huge difference in performance.","title":"1.6 Prior simulations' summary statistics vs observed summary statistics"},{"location":"tutorial/10_crafting_summary_statistics/#17-explicit-recommendations","text":"We give some explicit recommendation when using summary statistics Visualize the histogram of each summary statistic and plot the value of the observation. If, for some summary statistics, the observation is not covered (or is at the very border, e.g. the MSE above), the trained neural network will struggle. Do not use an \u201cerror\u201d as summary statistic. This is common in optimization (e.g. genetic algorithms), but it often leads to trouble in sbi due to the reason above. Only use summary statistics that are necessary. The less summary statistics you use, the less can go wrong with them. Of course, you have to ensure that the summary statistics describe the raw data sufficiently well.","title":"1.7 Explicit recommendations"},{"location":"tutorial/11_sampler_interface/","text":"Sampling algorithms in sbi \u00b6 Note: this tutorial requires that the user is already familiar with the flexible interface . sbi implements three methods: SNPE, SNLE, and SNRE. When using SNPE, the trained neural network directly approximates the posterior. Thus, sampling from the posterior can be done by sampling from the trained neural network. The neural networks trained in SNLE and SNRE approximate the likelihood(-ratio). Thus, in order to draw samples from the posterior, one has to perform additional sampling steps, e.g. Markov-chain Monte-Carlo (MCMC). In sbi , the implemented samplers are: Markov-chain Monte-Carlo (MCMC) Rejection sampling Variational inference (VI) Below, we will demonstrate how these samplers can be used in sbi . First, we train the neural network as always: import torch from sbi.inference import SNLE # dummy Gaussian simulator for demonstration num_dim = 2 prior = torch . distributions . MultivariateNormal ( torch . zeros ( num_dim ), torch . eye ( num_dim )) theta = prior . sample (( 1000 ,)) x = theta + torch . randn (( 1000 , num_dim )) x_o = torch . randn (( 1 , num_dim )) inference = SNLE ( prior = prior , show_progress_bars = False ) likelihood_estimator = inference . append_simulations ( theta , x ) . train () And then we pass the options for which sampling method to use to the build_posterior() method: # Sampling with MCMC sampling_algorithm = \"mcmc\" mcmc_method = \"slice_np\" # or nuts, or hmc posterior = inference . build_posterior ( sample_with = sampling_algorithm , mcmc_method = mcmc_method ) # Sampling with variational inference sampling_algorithm = \"vi\" vi_method = \"rKL\" # or fKL posterior = inference . build_posterior ( sample_with = sampling_algorithm , vi_method = vi_method ) # Unlike other methods, vi needs a training step for every observation. posterior = posterior . set_default_x ( x_o ) . train () # Sampling with rejection sampling sampling_algorithm = \"rejection\" posterior = inference . build_posterior ( sample_with = sampling_algorithm ) More flexibility in adjusting the sampler \u00b6 With the above syntax, you can easily try out different sampling algorithms. However, in many cases, you might want to customize your sampler. Below, we demonstrate how you can change hyperparameters of the samplers (e.g. number of warm-up steps of MCMC) or how you can write your own sampler from scratch. Main syntax (for SNLE and SNRE) \u00b6 As above, we begin by training the neural network as always: import torch from sbi.inference import SNLE # dummy Gaussian simulator for demonstration num_dim = 2 prior = torch . distributions . MultivariateNormal ( torch . zeros ( num_dim ), torch . eye ( num_dim )) theta = prior . sample (( 1000 ,)) x = theta + torch . randn (( 1000 , num_dim )) x_o = torch . randn (( 1 , num_dim )) inference = SNLE ( show_progress_bars = False ) likelihood_estimator = inference . append_simulations ( theta , x ) . train () Neural network successfully converged after 52 epochs. Then, for full flexibility on using the sampler, we do not use the .build_posterior() method, but instead we explicitly define the potential function and the sampling algorithm (see below for explanation): from sbi.inference import likelihood_estimator_based_potential , MCMCPosterior potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform , warmup_steps = 10 ) If you want to use variational inference or rejection sampling, you have to replace the last line with VIPosterior or RejectionPosterior : # For VI, we have to train. posterior = VIPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform ) . train () posterior = RejectionPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform ) At this point, you could also plug the potential_fn into any sampler of your choice and not rely on any of the in-built sbi -samplers. Further explanation \u00b6 The first lines are the same as for the flexible interface: inference = SNLE () likelihood_estimator = inference . append_simulations ( theta , x ) . train () Neural network successfully converged after 33 epochs. Next, we obtain the potential function. A potential function is a function of the parameter \\(f(\\theta)\\) . The posterior is proportional to the product of likelihood and prior: \\(p(\\theta | x_o) \\propto p(x_o | \\theta)p(\\theta)\\) . The potential function is the logarithm of the right-hand side of this equation: \\(f(\\theta) = \\log(p(x_o | \\theta)p(\\theta))\\) potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) By calling the potential_fn , you can evaluate the potential: # Assuming that your parameters are 1D. potential = potential_fn ( torch . zeros ( 1 , num_dim ) ) # -> returns f(0) = log( p(x_o|0) p(0) ) The other object that is returned by likelihood_estimator_based_potential is a parameter_transform . The parameter_transform is a pytorch transform . The parameter_transform is a fixed transform that is can be applied to parameter theta . It transforms the parameters into unconstrained space (if the prior is bounded, e.g. BoxUniform ), and standardizes the parameters (i.e. zero mean, one std). Using parameter_transform during sampling is optional, but it usually improves the performance of MCMC. theta_tf = parameter_transform ( torch . zeros ( 1 , num_dim )) theta_original = parameter_transform . inv ( theta_tf ) print ( theta_original ) # -> tensor([[0.0]]) tensor([[0., 0.]]) After having obtained the potential_fn , we can sample from the posterior with MCMC or rejection sampling: from sbi.inference import MCMCPosterior , RejectionPosterior posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform ) posterior = RejectionPosterior ( potential_fn , proposal = prior ) Main syntax for SNPE \u00b6 SNPE usually does not require MCMC or rejection sampling (if you still need it, you can use the same syntax as above with the posterior_estimator_based_potential function). Instead, SNPE samples from the neural network. If the support of the prior is bounded, some samples can lie outside of the support of the prior. The DirectPosterior class automatically rejects these samples: from sbi.inference import SNPE from sbi.inference import DirectPosterior inference = SNPE () posterior_estimator = inference . append_simulations ( theta , x ) . train () posterior = DirectPosterior ( posterior_estimator , prior = prior ) Neural network successfully converged after 57 epochs.","title":"Sampling algorithms in sbi"},{"location":"tutorial/11_sampler_interface/#sampling-algorithms-in-sbi","text":"Note: this tutorial requires that the user is already familiar with the flexible interface . sbi implements three methods: SNPE, SNLE, and SNRE. When using SNPE, the trained neural network directly approximates the posterior. Thus, sampling from the posterior can be done by sampling from the trained neural network. The neural networks trained in SNLE and SNRE approximate the likelihood(-ratio). Thus, in order to draw samples from the posterior, one has to perform additional sampling steps, e.g. Markov-chain Monte-Carlo (MCMC). In sbi , the implemented samplers are: Markov-chain Monte-Carlo (MCMC) Rejection sampling Variational inference (VI) Below, we will demonstrate how these samplers can be used in sbi . First, we train the neural network as always: import torch from sbi.inference import SNLE # dummy Gaussian simulator for demonstration num_dim = 2 prior = torch . distributions . MultivariateNormal ( torch . zeros ( num_dim ), torch . eye ( num_dim )) theta = prior . sample (( 1000 ,)) x = theta + torch . randn (( 1000 , num_dim )) x_o = torch . randn (( 1 , num_dim )) inference = SNLE ( prior = prior , show_progress_bars = False ) likelihood_estimator = inference . append_simulations ( theta , x ) . train () And then we pass the options for which sampling method to use to the build_posterior() method: # Sampling with MCMC sampling_algorithm = \"mcmc\" mcmc_method = \"slice_np\" # or nuts, or hmc posterior = inference . build_posterior ( sample_with = sampling_algorithm , mcmc_method = mcmc_method ) # Sampling with variational inference sampling_algorithm = \"vi\" vi_method = \"rKL\" # or fKL posterior = inference . build_posterior ( sample_with = sampling_algorithm , vi_method = vi_method ) # Unlike other methods, vi needs a training step for every observation. posterior = posterior . set_default_x ( x_o ) . train () # Sampling with rejection sampling sampling_algorithm = \"rejection\" posterior = inference . build_posterior ( sample_with = sampling_algorithm )","title":"Sampling algorithms in sbi"},{"location":"tutorial/11_sampler_interface/#more-flexibility-in-adjusting-the-sampler","text":"With the above syntax, you can easily try out different sampling algorithms. However, in many cases, you might want to customize your sampler. Below, we demonstrate how you can change hyperparameters of the samplers (e.g. number of warm-up steps of MCMC) or how you can write your own sampler from scratch.","title":"More flexibility in adjusting the sampler"},{"location":"tutorial/11_sampler_interface/#main-syntax-for-snle-and-snre","text":"As above, we begin by training the neural network as always: import torch from sbi.inference import SNLE # dummy Gaussian simulator for demonstration num_dim = 2 prior = torch . distributions . MultivariateNormal ( torch . zeros ( num_dim ), torch . eye ( num_dim )) theta = prior . sample (( 1000 ,)) x = theta + torch . randn (( 1000 , num_dim )) x_o = torch . randn (( 1 , num_dim )) inference = SNLE ( show_progress_bars = False ) likelihood_estimator = inference . append_simulations ( theta , x ) . train () Neural network successfully converged after 52 epochs. Then, for full flexibility on using the sampler, we do not use the .build_posterior() method, but instead we explicitly define the potential function and the sampling algorithm (see below for explanation): from sbi.inference import likelihood_estimator_based_potential , MCMCPosterior potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform , warmup_steps = 10 ) If you want to use variational inference or rejection sampling, you have to replace the last line with VIPosterior or RejectionPosterior : # For VI, we have to train. posterior = VIPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform ) . train () posterior = RejectionPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform ) At this point, you could also plug the potential_fn into any sampler of your choice and not rely on any of the in-built sbi -samplers.","title":"Main syntax (for SNLE and SNRE)"},{"location":"tutorial/11_sampler_interface/#further-explanation","text":"The first lines are the same as for the flexible interface: inference = SNLE () likelihood_estimator = inference . append_simulations ( theta , x ) . train () Neural network successfully converged after 33 epochs. Next, we obtain the potential function. A potential function is a function of the parameter \\(f(\\theta)\\) . The posterior is proportional to the product of likelihood and prior: \\(p(\\theta | x_o) \\propto p(x_o | \\theta)p(\\theta)\\) . The potential function is the logarithm of the right-hand side of this equation: \\(f(\\theta) = \\log(p(x_o | \\theta)p(\\theta))\\) potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) By calling the potential_fn , you can evaluate the potential: # Assuming that your parameters are 1D. potential = potential_fn ( torch . zeros ( 1 , num_dim ) ) # -> returns f(0) = log( p(x_o|0) p(0) ) The other object that is returned by likelihood_estimator_based_potential is a parameter_transform . The parameter_transform is a pytorch transform . The parameter_transform is a fixed transform that is can be applied to parameter theta . It transforms the parameters into unconstrained space (if the prior is bounded, e.g. BoxUniform ), and standardizes the parameters (i.e. zero mean, one std). Using parameter_transform during sampling is optional, but it usually improves the performance of MCMC. theta_tf = parameter_transform ( torch . zeros ( 1 , num_dim )) theta_original = parameter_transform . inv ( theta_tf ) print ( theta_original ) # -> tensor([[0.0]]) tensor([[0., 0.]]) After having obtained the potential_fn , we can sample from the posterior with MCMC or rejection sampling: from sbi.inference import MCMCPosterior , RejectionPosterior posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform ) posterior = RejectionPosterior ( potential_fn , proposal = prior )","title":"Further explanation"},{"location":"tutorial/11_sampler_interface/#main-syntax-for-snpe","text":"SNPE usually does not require MCMC or rejection sampling (if you still need it, you can use the same syntax as above with the posterior_estimator_based_potential function). Instead, SNPE samples from the neural network. If the support of the prior is bounded, some samples can lie outside of the support of the prior. The DirectPosterior class automatically rejects these samples: from sbi.inference import SNPE from sbi.inference import DirectPosterior inference = SNPE () posterior_estimator = inference . append_simulations ( theta , x ) . train () posterior = DirectPosterior ( posterior_estimator , prior = prior ) Neural network successfully converged after 57 epochs.","title":"Main syntax for SNPE"},{"location":"tutorial/12_diagnostics_posterior_predictive_check/","text":"Posterior Predictive Checks (PPC) in SBI \u00b6 A common safety check performed as part of inference are Posterior Predictive Checks (PPC) . A PPC compares data \\(x_{\\text{pp}}\\) generated using the parameters \\(\\theta_{\\text{posterior}}\\) sampled from the posterior with the observed data \\(x_o\\) . The general concept is that -if the inference is correct- the generated data \\(x_{\\text{pp}}\\) should \u201clook similar\u201d the oberved data \\(x_0\\) . Said differently, \\(x_o\\) should be within the support of \\(x_{\\text{pp}}\\) . A PPC usually shouldn\u2019t be used as a validation metric . Nonetheless a PPC is a good start for an inference diagnosis and can provide with an intuition about any bias introduced in inference: does \\(x_{\\text{pp}}\\) systematically differ from \\(x_o\\) ? Main syntax \u00b6 from sbi.analysis import pairplot # A PPC is performed after we trained or neural posterior posterior . set_default_x ( x_o ) # We draw theta samples from the posterior. This part is not in the scope of SBI posterior_samples = posterior . sample (( 5_000 ,)) # We use posterior theta samples to generate x data x_pp = simulator ( posterior_samples ) # We verify if the observed data falls within the support of the generated data _ = pairplot ( samples = x_pp , points = x_o ) Performing a PPC over a toy example \u00b6 Below we provide an example Posterior Predictive Check (PPC) over some toy example: from sbi.analysis import pairplot import torch _ = torch . manual_seed ( 0 ) We work on an inference problem over three parameters using any of the techniques implemented in sbi . In this tutorial, we load the dummy posterior: from toy_posterior_for_07_cc import ExamplePosterior posterior = ExamplePosterior () Let us say that we are observing the data point \\(x_o\\) : D = 5 # simulator output was 5-dimensional x_o = torch . ones ( 1 , D ) posterior . set_default_x ( x_o ) The posterior can be used to draw \\(\\theta_{\\text{posterior}}\\) samples: posterior_samples = posterior . sample (( 5_000 ,)) fig , ax = pairplot ( samples = posterior_samples , limits = torch . tensor ([[ - 2.5 , 2.5 ]] * 3 ), offdiag = [ \"kde\" ], diag = [ \"kde\" ], figsize = ( 5 , 5 ), labels = [ rf \"$\\theta_ { d } $\" for d in range ( 3 )], ) Now we can use our simulator to generate some data \\(x_{\\text{PP}}\\) , using as input parameters the poterior samples \\(\\theta_{\\text{posterior}}\\) . Note that the simulation part is not in the sbi scope, so any simulator -including a non-Python one- can be used at this stage. In our case we\u2019ll use a dummy simulator: def dummy_simulator ( posterior_samples : torch . Tensor , * args , ** kwargs ) -> torch . Tensor : sample_size = posterior_samples . shape [ 0 ] scale = 1.0 shift = torch . distributions . Gumbel ( loc = torch . zeros ( D ), scale = scale / 2 ) . sample () return torch . distributions . Gumbel ( loc = x_o [ 0 ] + shift , scale = scale ) . sample ( ( sample_size ,) ) x_pp = dummy_simulator ( posterior_samples ) Plotting \\(x_o\\) against the \\(x_{\\text{pp}}\\) , we perform a PPC that plays the role of a sanity check. In this case, the check indicates that \\(x_o\\) falls right within the support of \\(x_{\\text{pp}}\\) , which should make the experimenter rather confident about the estimated posterior : _ = pairplot ( samples = x_pp , points = x_o [ 0 ], limits = torch . tensor ([[ - 2.0 , 5.0 ]] * 5 ), points_colors = \"red\" , figsize = ( 8 , 8 ), offdiag = \"scatter\" , scatter_offdiag = dict ( marker = \".\" , s = 5 ), points_offdiag = dict ( marker = \"+\" , markersize = 20 ), labels = [ rf \"$x_ { d } $\" for d in range ( D )], ) In contrast, \\(x_o\\) falling well outside the support of \\(x_{\\text{pp}}\\) is indicative of a failure to estimate the correct posterior. Here we simulate such a failure mode: error_shift = - 2.0 * torch . ones ( 1 , 5 ) _ = pairplot ( samples = x_pp , points = x_o [ 0 ] + error_shift , limits = torch . tensor ([[ - 2.0 , 5.0 ]] * 5 ), points_colors = \"red\" , figsize = ( 8 , 8 ), offdiag = \"scatter\" , scatter_offdiag = dict ( marker = \".\" , s = 5 ), points_offdiag = dict ( marker = \"+\" , markersize = 20 ), labels = [ rf \"$x_ { d } $\" for d in range ( D )], ) A typical way to investigate this issue would be to run a prior* predictive check , applying the same plotting strategy, but drawing \\(\\theta\\) from the prior instead of the posterior. **The support for \\(x_{\\text{pp}}\\) should be larger and should contain \\(x_o\\) * . If this check is successful, the \u201cblame\u201d can then be shifted to the inference (method used, convergence of density estimators, number of sequential rounds, etc\u2026).","title":"Posterior predictive checks"},{"location":"tutorial/12_diagnostics_posterior_predictive_check/#posterior-predictive-checks-ppc-in-sbi","text":"A common safety check performed as part of inference are Posterior Predictive Checks (PPC) . A PPC compares data \\(x_{\\text{pp}}\\) generated using the parameters \\(\\theta_{\\text{posterior}}\\) sampled from the posterior with the observed data \\(x_o\\) . The general concept is that -if the inference is correct- the generated data \\(x_{\\text{pp}}\\) should \u201clook similar\u201d the oberved data \\(x_0\\) . Said differently, \\(x_o\\) should be within the support of \\(x_{\\text{pp}}\\) . A PPC usually shouldn\u2019t be used as a validation metric . Nonetheless a PPC is a good start for an inference diagnosis and can provide with an intuition about any bias introduced in inference: does \\(x_{\\text{pp}}\\) systematically differ from \\(x_o\\) ?","title":"Posterior Predictive Checks (PPC) in SBI"},{"location":"tutorial/12_diagnostics_posterior_predictive_check/#main-syntax","text":"from sbi.analysis import pairplot # A PPC is performed after we trained or neural posterior posterior . set_default_x ( x_o ) # We draw theta samples from the posterior. This part is not in the scope of SBI posterior_samples = posterior . sample (( 5_000 ,)) # We use posterior theta samples to generate x data x_pp = simulator ( posterior_samples ) # We verify if the observed data falls within the support of the generated data _ = pairplot ( samples = x_pp , points = x_o )","title":"Main syntax"},{"location":"tutorial/12_diagnostics_posterior_predictive_check/#performing-a-ppc-over-a-toy-example","text":"Below we provide an example Posterior Predictive Check (PPC) over some toy example: from sbi.analysis import pairplot import torch _ = torch . manual_seed ( 0 ) We work on an inference problem over three parameters using any of the techniques implemented in sbi . In this tutorial, we load the dummy posterior: from toy_posterior_for_07_cc import ExamplePosterior posterior = ExamplePosterior () Let us say that we are observing the data point \\(x_o\\) : D = 5 # simulator output was 5-dimensional x_o = torch . ones ( 1 , D ) posterior . set_default_x ( x_o ) The posterior can be used to draw \\(\\theta_{\\text{posterior}}\\) samples: posterior_samples = posterior . sample (( 5_000 ,)) fig , ax = pairplot ( samples = posterior_samples , limits = torch . tensor ([[ - 2.5 , 2.5 ]] * 3 ), offdiag = [ \"kde\" ], diag = [ \"kde\" ], figsize = ( 5 , 5 ), labels = [ rf \"$\\theta_ { d } $\" for d in range ( 3 )], ) Now we can use our simulator to generate some data \\(x_{\\text{PP}}\\) , using as input parameters the poterior samples \\(\\theta_{\\text{posterior}}\\) . Note that the simulation part is not in the sbi scope, so any simulator -including a non-Python one- can be used at this stage. In our case we\u2019ll use a dummy simulator: def dummy_simulator ( posterior_samples : torch . Tensor , * args , ** kwargs ) -> torch . Tensor : sample_size = posterior_samples . shape [ 0 ] scale = 1.0 shift = torch . distributions . Gumbel ( loc = torch . zeros ( D ), scale = scale / 2 ) . sample () return torch . distributions . Gumbel ( loc = x_o [ 0 ] + shift , scale = scale ) . sample ( ( sample_size ,) ) x_pp = dummy_simulator ( posterior_samples ) Plotting \\(x_o\\) against the \\(x_{\\text{pp}}\\) , we perform a PPC that plays the role of a sanity check. In this case, the check indicates that \\(x_o\\) falls right within the support of \\(x_{\\text{pp}}\\) , which should make the experimenter rather confident about the estimated posterior : _ = pairplot ( samples = x_pp , points = x_o [ 0 ], limits = torch . tensor ([[ - 2.0 , 5.0 ]] * 5 ), points_colors = \"red\" , figsize = ( 8 , 8 ), offdiag = \"scatter\" , scatter_offdiag = dict ( marker = \".\" , s = 5 ), points_offdiag = dict ( marker = \"+\" , markersize = 20 ), labels = [ rf \"$x_ { d } $\" for d in range ( D )], ) In contrast, \\(x_o\\) falling well outside the support of \\(x_{\\text{pp}}\\) is indicative of a failure to estimate the correct posterior. Here we simulate such a failure mode: error_shift = - 2.0 * torch . ones ( 1 , 5 ) _ = pairplot ( samples = x_pp , points = x_o [ 0 ] + error_shift , limits = torch . tensor ([[ - 2.0 , 5.0 ]] * 5 ), points_colors = \"red\" , figsize = ( 8 , 8 ), offdiag = \"scatter\" , scatter_offdiag = dict ( marker = \".\" , s = 5 ), points_offdiag = dict ( marker = \"+\" , markersize = 20 ), labels = [ rf \"$x_ { d } $\" for d in range ( D )], ) A typical way to investigate this issue would be to run a prior* predictive check , applying the same plotting strategy, but drawing \\(\\theta\\) from the prior instead of the posterior. **The support for \\(x_{\\text{pp}}\\) should be larger and should contain \\(x_o\\) * . If this check is successful, the \u201cblame\u201d can then be shifted to the inference (method used, convergence of density estimators, number of sequential rounds, etc\u2026).","title":"Performing a PPC over a toy example"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/","text":"Simulation-based Calibration in SBI \u00b6 After a density estimator has been trained with simulated data to obtain a posterior, the estimator should be made subject to several diagnostic tests, before being used for inference given the actual observed data. Posterior Predictive Checks (see tutorial 12) provide one way to \u201ccritique\u201d a trained estimator via its predictive performance. Another important approach to such diagnostics is simulation-based calibration as reported by Talts et al, 2018 . Simulation-based calibration (SBC) provides a (qualitative) view and a quantitive measure to check, whether the uncertainties of the posterior are balanced, i.e., neither over-confident nor under-confident. As such, SBC can be viewed as a necessary condition (but not sufficient) for a valid inference algorithm: If SBC checks fail, this tells you that your inference is invalid. If SBC checks pass, this is no guarantee that the posterior estimation is working. In a nutshell \u00b6 To run SBC, we sample theta_o_i values from the prior of the problem at hand we simulate \u201cobservations\u201d from these parameters: x_o_i = simulator(theta_o_i) we perform inference given each observation x_o_i . This produces a separate posterior \\(p_i(\\theta | x_{o,i})\\) for each of x_o_i . The key step for SBC is to generate a set of posterior samples \\(\\{\\theta\\}_i\\) from each posterior (let\u2019s call this theta_i_s , referring to s samples from posterior \\(p_i(\\theta | x_{o,i})\\) ), and to rank the corresponding theta_o_i under this set of samples. A rank is computed by counting how many samples theta_i_s fall below their corresponding theta_o_i (see section 4.1 in Talts et al.). These ranks are then used to perform the SBC check. Key ideas behind SBC \u00b6 The core idea behind SBC is two fold: SBC ranks of ground truth parameters under the inferred posterior samples follow a uniform distribution. (If the SBC ranks are not uniformly distributed, the posterior is not well calibrated.) samples from the data averaged posterior (ensemble of randomly chosen posterior samples given multiple distinct observations x_o ) are distributed according to the prior What can SBC diagnose? \u00b6 SBC can inform us whether we are not wrong. However, it cannot tell us whether we are right, i.e., SBC checks a necessary condition. For example, imagine you run SBC using the prior as a posterior. The ranks would be perfectly uniform. But the inference would be wrong. The Posterior Predictive Checks (see tutorial 12) can be seen as the complementary sufficient check for the posterior (only as a methaphor, no theoretical guarantees here). Using the prior as a posterior and then doing predictive checks would clearly show that inference failed. To summarize SBC can: tell us whether the SBI method applied to the problem at hand produces posteriors that have well-calibrated uncertainties, and if not, what kind of systematic bias it has: negative or positive bias (shift in the mean of the predictions) or over- or underdispersion (too large or too small variance) A healthy posterior \u00b6 Let\u2019s take the gaussian linear simulator from the previous tutorials and run inference with NPE on it. Note: SBC requires running inference several times. Using SBC with amortized methods like NPE is hence a justified endavour: repeated inference is cheap and SBC can be performed with little runtime penalty. This does not hold for sequential methods or anything relying on MCMC or VI (here, parallelization is your friend, num_workers>1 ). import torch _ = torch . manual_seed ( 10 ) from torch import eye , ones , zeros from torch.distributions import MultivariateNormal from sbi.analysis import check_sbc , run_sbc , get_nltp , sbc_rank_plot from sbi.inference import SNPE , SNPE_C , prepare_for_sbi , simulate_for_sbi from sbi.simulators import linear_gaussian , diagonal_linear_gaussian num_dim = 2 num_simulations = 5_000 prior_mean = ones ( num_dim ) prior_cov = 2 * eye ( num_dim ) prior = MultivariateNormal ( loc = prior_mean , covariance_matrix = prior_cov , validate_args = False ) An ideal case \u00b6 To explore SBC, we make our life easy and assume that we deal with a problem where the likelihood is modelled by an identity mapping and a bit of smear. But to start, we only use an almost vanishing smear of 0.01 . default_likelihood_loc = 0.0 # let's start with 0 shift default_likelihood_scale = 0.01 # let's smear theta only by a little bit def simulator ( theta , loc = default_likelihood_loc , scale = default_likelihood_scale ): \"\"\"linear gaussian inspired by sbibm https://github.com/sbi-benchmark/sbibm/blob/15f068a08a938383116ffd92b92de50c580810a3/sbibm/tasks/gaussian_linear/task.py#L74 \"\"\" num_dim = theta . shape [ - 1 ] cov_ = scale * eye ( num_dim ) # always positively semi-definite # using validate_args=False disables sanity checks on `covariance_matrix` # for the sake of speed value = MultivariateNormal ( loc = ( theta + loc ), covariance_matrix = cov_ , validate_args = False ) . sample () return value theta , x = simulate_for_sbi ( simulator , prior , num_simulations ) Running 5000 simulations.: 0%| | 0/5000 [00:001 ). import torch _ = torch . manual_seed ( 10 ) from torch import eye , ones , zeros from torch.distributions import MultivariateNormal from sbi.analysis import check_sbc , run_sbc , get_nltp , sbc_rank_plot from sbi.inference import SNPE , SNPE_C , prepare_for_sbi , simulate_for_sbi from sbi.simulators import linear_gaussian , diagonal_linear_gaussian num_dim = 2 num_simulations = 5_000 prior_mean = ones ( num_dim ) prior_cov = 2 * eye ( num_dim ) prior = MultivariateNormal ( loc = prior_mean , covariance_matrix = prior_cov , validate_args = False )","title":"A healthy posterior"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#an-ideal-case","text":"To explore SBC, we make our life easy and assume that we deal with a problem where the likelihood is modelled by an identity mapping and a bit of smear. But to start, we only use an almost vanishing smear of 0.01 . default_likelihood_loc = 0.0 # let's start with 0 shift default_likelihood_scale = 0.01 # let's smear theta only by a little bit def simulator ( theta , loc = default_likelihood_loc , scale = default_likelihood_scale ): \"\"\"linear gaussian inspired by sbibm https://github.com/sbi-benchmark/sbibm/blob/15f068a08a938383116ffd92b92de50c580810a3/sbibm/tasks/gaussian_linear/task.py#L74 \"\"\" num_dim = theta . shape [ - 1 ] cov_ = scale * eye ( num_dim ) # always positively semi-definite # using validate_args=False disables sanity checks on `covariance_matrix` # for the sake of speed value = MultivariateNormal ( loc = ( theta + loc ), covariance_matrix = cov_ , validate_args = False ) . sample () return value theta , x = simulate_for_sbi ( simulator , prior , num_simulations ) Running 5000 simulations.: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Indeed, with increasing number of trials the posterior density concentrates around the true underlying parameter. IID inference with NLE \u00b6 (S)NLE and (S)NRE can perform inference given multiple IID obserations by using only single-trial training data (i.e., for training, we run the simulator only once per parameter set). Once the likelihood is learned on single trials (i.e., a neural network that predicts the likelihood of a single observation given a parameter set), one can sample the posterior for any number of trials. This works because, given a single-trial neural likelihood from (S)NLE or (S)NRE, we can calculate the joint likelihoods of all trials by multiplying them together (or adding them in log-space). The joint likelihood can then be plugged into MCMC or VI . sbi takes care of all of these steps, so you do not have to implement anything yourself: # Train SNLE. inferer = SNLE ( prior , show_progress_bars = True , density_estimator = \"mdn\" ) theta , x = simulate_for_sbi ( simulator , prior , 10000 , simulation_batch_size = 1000 ) inferer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ); Running 10000 simulations.: 0%| | 0/10000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); The pairplot above already indicates that (S)NLE is well able to obtain accurate posterior samples also for increasing number of trials (note that we trained the single-round version of SNLE so that we did not have to re-train it for new \\(x_o\\) ). Quantitatively we can measure the accuracy of SNLE by calculating the c2st score between SNLE and the true posterior samples, where the best accuracy is perfect for 0.5 : cs = [ c2st ( torch . from_numpy ( s1 ), torch . from_numpy ( s2 )) for s1 , s2 in zip ( true_samples , nle_samples ) ] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.51 c2st score for num_trials=15: 0.51 c2st score for num_trials=20: 0.51 IID inference with NPE using permutation-invariant embedding nets \u00b6 For NPE we need to define an embedding net that handles the set-like structure of iid-data, i.e., that it permutation invariant and can handle different number of trials. We implemented several embedding net classes that allow to construct such a permutation- and number-of-trials invariant embedding net. To become permutation invariant, the neural net first learns embeddings for single trials and then performs a permutation invariant operation on those embeddings, e.g., by taking the sum or the mean (Chen et al. 2018, Radev et al. 2021). To become invariant w.r.t. the number-of-trials, we train the net with varying number of trials for each parameter setting. This means that, unlike for (S)NLE and (S)NRE, (S)NPE requires to run the simulator multiple times for individual parameter sets to generate the training data. In order to implement this in sbi , \u201cunobserved\u201d trials in the training dataset have to be masked by NaNs (and ignore the resulting SBI warning about NaNs in the training data). Construct training data set. \u00b6 # we need to fix the maximum number of trials. max_num_trials = 20 # construct training data set: we want to cover the full range of possible number of # trials num_training_samples = 5000 theta = prior . sample (( num_training_samples ,)) # there are certainly smarter ways to construct the training data set, but we go with a # for loop here for illustration purposes. x = torch . ones ( num_training_samples * max_num_trials , max_num_trials , x_dim ) * float ( \"nan\" ) for i in range ( num_training_samples ): xi = simulator ( theta [ i ] . repeat ( max_num_trials , 1 )) for j in range ( max_num_trials ): x [ i * max_num_trials + j , : j + 1 , :] = xi [: j + 1 , :] theta = theta . repeat_interleave ( max_num_trials , dim = 0 ) Build embedding net \u00b6 from sbi.neural_nets.embedding_nets import ( FCEmbedding , PermutationInvariantEmbedding , ) from sbi.utils import posterior_nn # embedding latent_dim = 10 single_trial_net = FCEmbedding ( input_dim = theta_dim , num_hiddens = 40 , num_layers = 2 , output_dim = latent_dim , ) embedding_net = PermutationInvariantEmbedding ( single_trial_net , trial_net_output_dim = latent_dim , # NOTE: post-embedding is not needed really. num_layers = 1 , num_hiddens = 10 , output_dim = 10 , ) # we choose a simple MDN as the density estimator. # NOTE: we turn off z-scoring of the data, as we used NaNs for the missing trials. density_estimator = posterior_nn ( \"mdn\" , embedding_net = embedding_net , z_score_x = \"none\" ) Run training \u00b6 inference = SNPE ( prior , density_estimator = density_estimator ) # NOTE: we don't exclude invalid x because we used NaNs for the missing trials. inference . append_simulations ( theta , x , exclude_invalid_x = False , ) . train ( training_batch_size = 1000 ) posterior = inference . build_posterior () WARNING:root:Found 95000 NaN simulations and 0 Inf simulations. They are not excluded from training due to `exclude_invalid_x=False`.Training will likely fail, we strongly recommend `exclude_invalid_x=True` for Single-round NPE. Neural network successfully converged after 168 epochs. Amortized inference \u00b6 Comparing runtimes, we see that the NPE training takes a bit longer than the training on single trials for NLE above. However, we trained the density estimator such that it can handle multiple and changing number of iid trials (up to 20). Thus, we can obtain posterior samples for different x_o with just a single forward pass instead of having to run MCMC for each new observation. As you can see below, the c2st score for increasing number of observed trials remains close to the ideal 0.5 . npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) cs = [ c2st ( torch . from_numpy ( s1 ), s2 ) for s1 , s2 in zip ( true_samples , npe_samples )] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Indeed, with increasing number of trials the posterior density concentrates around the true underlying parameter.","title":"The analytical posterior concentrates around true parameters with increasing number of IID trials"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#iid-inference-with-nle","text":"(S)NLE and (S)NRE can perform inference given multiple IID obserations by using only single-trial training data (i.e., for training, we run the simulator only once per parameter set). Once the likelihood is learned on single trials (i.e., a neural network that predicts the likelihood of a single observation given a parameter set), one can sample the posterior for any number of trials. This works because, given a single-trial neural likelihood from (S)NLE or (S)NRE, we can calculate the joint likelihoods of all trials by multiplying them together (or adding them in log-space). The joint likelihood can then be plugged into MCMC or VI . sbi takes care of all of these steps, so you do not have to implement anything yourself: # Train SNLE. inferer = SNLE ( prior , show_progress_bars = True , density_estimator = \"mdn\" ) theta , x = simulate_for_sbi ( simulator , prior , 10000 , simulation_batch_size = 1000 ) inferer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ); Running 10000 simulations.: 0%| | 0/10000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); The pairplot above already indicates that (S)NLE is well able to obtain accurate posterior samples also for increasing number of trials (note that we trained the single-round version of SNLE so that we did not have to re-train it for new \\(x_o\\) ). Quantitatively we can measure the accuracy of SNLE by calculating the c2st score between SNLE and the true posterior samples, where the best accuracy is perfect for 0.5 : cs = [ c2st ( torch . from_numpy ( s1 ), torch . from_numpy ( s2 )) for s1 , s2 in zip ( true_samples , nle_samples ) ] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.51 c2st score for num_trials=15: 0.51 c2st score for num_trials=20: 0.51","title":"IID inference with NLE"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#iid-inference-with-npe-using-permutation-invariant-embedding-nets","text":"For NPE we need to define an embedding net that handles the set-like structure of iid-data, i.e., that it permutation invariant and can handle different number of trials. We implemented several embedding net classes that allow to construct such a permutation- and number-of-trials invariant embedding net. To become permutation invariant, the neural net first learns embeddings for single trials and then performs a permutation invariant operation on those embeddings, e.g., by taking the sum or the mean (Chen et al. 2018, Radev et al. 2021). To become invariant w.r.t. the number-of-trials, we train the net with varying number of trials for each parameter setting. This means that, unlike for (S)NLE and (S)NRE, (S)NPE requires to run the simulator multiple times for individual parameter sets to generate the training data. In order to implement this in sbi , \u201cunobserved\u201d trials in the training dataset have to be masked by NaNs (and ignore the resulting SBI warning about NaNs in the training data).","title":"IID inference with NPE using permutation-invariant embedding nets"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#construct-training-data-set","text":"# we need to fix the maximum number of trials. max_num_trials = 20 # construct training data set: we want to cover the full range of possible number of # trials num_training_samples = 5000 theta = prior . sample (( num_training_samples ,)) # there are certainly smarter ways to construct the training data set, but we go with a # for loop here for illustration purposes. x = torch . ones ( num_training_samples * max_num_trials , max_num_trials , x_dim ) * float ( \"nan\" ) for i in range ( num_training_samples ): xi = simulator ( theta [ i ] . repeat ( max_num_trials , 1 )) for j in range ( max_num_trials ): x [ i * max_num_trials + j , : j + 1 , :] = xi [: j + 1 , :] theta = theta . repeat_interleave ( max_num_trials , dim = 0 )","title":"Construct training data set."},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#build-embedding-net","text":"from sbi.neural_nets.embedding_nets import ( FCEmbedding , PermutationInvariantEmbedding , ) from sbi.utils import posterior_nn # embedding latent_dim = 10 single_trial_net = FCEmbedding ( input_dim = theta_dim , num_hiddens = 40 , num_layers = 2 , output_dim = latent_dim , ) embedding_net = PermutationInvariantEmbedding ( single_trial_net , trial_net_output_dim = latent_dim , # NOTE: post-embedding is not needed really. num_layers = 1 , num_hiddens = 10 , output_dim = 10 , ) # we choose a simple MDN as the density estimator. # NOTE: we turn off z-scoring of the data, as we used NaNs for the missing trials. density_estimator = posterior_nn ( \"mdn\" , embedding_net = embedding_net , z_score_x = \"none\" )","title":"Build embedding net"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#run-training","text":"inference = SNPE ( prior , density_estimator = density_estimator ) # NOTE: we don't exclude invalid x because we used NaNs for the missing trials. inference . append_simulations ( theta , x , exclude_invalid_x = False , ) . train ( training_batch_size = 1000 ) posterior = inference . build_posterior () WARNING:root:Found 95000 NaN simulations and 0 Inf simulations. They are not excluded from training due to `exclude_invalid_x=False`.Training will likely fail, we strongly recommend `exclude_invalid_x=True` for Single-round NPE. Neural network successfully converged after 168 epochs.","title":"Run training"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#amortized-inference","text":"Comparing runtimes, we see that the NPE training takes a bit longer than the training on single trials for NLE above. However, we trained the density estimator such that it can handle multiple and changing number of iid trials (up to 20). Thus, we can obtain posterior samples for different x_o with just a single forward pass instead of having to run MCMC for each new observation. As you can see below, the c2st score for increasing number of observed trials remains close to the ideal 0.5 . npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) cs = [ c2st ( torch . from_numpy ( s1 ), s2 ) for s1 , s2 in zip ( true_samples , npe_samples )] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Indeed, with increasing number of trials the posterior density concentrates around the true underlying parameter. IID inference with NLE \u00b6 (S)NLE can easily perform inference given multiple IID x because it is based on learning the likelihood. Once the likelihood is learned on single trials, i.e., a neural network that given a single observation and a parameter predicts the likelihood of that observation given the parameter, one can perform MCMC to obtain posterior samples. MCMC relies on evaluating ratios of likelihoods of candidate parameters to either accept or reject them to be posterior samples. When inferring the posterior given multiple IID observation, these likelihoods are just the joint likelihoods of each IID observation given the current parameter candidate. Thus, given a neural likelihood from SNLE, we can calculate these joint likelihoods and perform MCMC given IID data, we just have to multiply together (or add in log-space) the individual trial-likelihoods ( sbi takes care of that). # Train SNLE. inferer = SNLE ( prior , show_progress_bars = True , density_estimator = \"mdn\" ) theta , x = simulate_for_sbi ( simulator , prior , 10000 , simulation_batch_size = 1000 ) inferer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ); Running 10000 simulations.: 0%| | 0/10000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); The pairplot above already indicates that (S)NLE is well able to obtain accurate posterior samples also for increasing number of trials (note that we trained the single-round version of SNLE so that we did not have to re-train it for new \\(x_o\\) ). Quantitatively we can measure the accuracy of SNLE by calculating the c2st score between SNLE and the true posterior samples, where the best accuracy is perfect for 0.5 : cs = [ c2st ( torch . from_numpy ( s1 ), torch . from_numpy ( s2 )) for s1 , s2 in zip ( true_samples , nle_samples ) ] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.51 c2st score for num_trials=15: 0.51 c2st score for num_trials=20: 0.51 IID inference with NPE using permutation-invariant embedding nets \u00b6 For NPE we need to define an embedding net that handles the set-like structure of iid-data, i.e., that it permutation invariant and can handle different number of trials. We implemented several embedding net classes that allow to construct such a permutation- and number-of-trials invariant embedding net. To become permutation invariant, the neural net first learns embeddings for single trials and then performs a permutation invariant operation on those embeddings, e.g., by taking the sum or the mean (Chen et al. 2018, Radev et al. 2021). To become invariant w.r.t. the number-of-trials, we train the net with varying number of trials for each parameter setting. As it is difficult to handle tensors of varying lengths in the SBI training loop, we construct a training data set in which \u201cunobserved\u201d trials are mask by NaNs (and ignore the resulting SBI warning about NaNs in the training data). Construct training data set. \u00b6 # we need to fix the maximum number of trials. max_num_trials = 20 # construct training data set: we want to cover the full range of possible number of # trials num_training_samples = 5000 theta = prior . sample (( num_training_samples ,)) # there are certainly smarter ways to construct the training data set, but we go with a # for loop here for illustration purposes. x = torch . ones ( num_training_samples * max_num_trials , max_num_trials , x_dim ) * float ( \"nan\" ) for i in range ( num_training_samples ): xi = simulator ( theta [ i ] . repeat ( max_num_trials , 1 )) for j in range ( max_num_trials ): x [ i * max_num_trials + j , : j + 1 , :] = xi [: j + 1 , :] theta = theta . repeat_interleave ( max_num_trials , dim = 0 ) Build embedding net \u00b6 from sbi.neural_nets.embedding_nets import ( FCEmbedding , PermutationInvariantEmbedding , ) from sbi.utils import posterior_nn # embedding latent_dim = 10 single_trial_net = FCEmbedding ( input_dim = theta_dim , num_hiddens = 40 , num_layers = 2 , output_dim = latent_dim , ) embedding_net = PermutationInvariantEmbedding ( single_trial_net , trial_net_output_dim = latent_dim , # NOTE: post-embedding is not needed really. num_layers = 1 , num_hiddens = 10 , output_dim = 10 , ) # we choose a simple MDN as the density estimator. # NOTE: we turn off z-scoring of the data, as we used NaNs for the missing trials. density_estimator = posterior_nn ( \"mdn\" , embedding_net = embedding_net , z_score_x = \"none\" ) Run training \u00b6 inference = SNPE ( prior , density_estimator = density_estimator ) # NOTE: we don't exclude invalid x because we used NaNs for the missing trials. inference . append_simulations ( theta , x , exclude_invalid_x = False , ) . train ( training_batch_size = 1000 ) posterior = inference . build_posterior () WARNING:root:Found 95000 NaN simulations and 0 Inf simulations. They are not excluded from training due to `exclude_invalid_x=False`.Training will likely fail, we strongly recommend `exclude_invalid_x=True` for Single-round NPE. Neural network successfully converged after 168 epochs. Amortized inference \u00b6 Comparing runtimes, we see that the NPE training takes a bit longer than the training on single trials for NLE above. However, we trained the density estimator such that it can handle multiple and changing number of iid trials (up to 20). Thus, we can obtain posterior samples for different x_o with just a single forward pass instead of having to run MCMC for each new observation. As you can see below, the c2st score for increasing number of observed trials remains close to the ideal 0.5 . npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) cs = [ c2st ( torch . from_numpy ( s1 ), s2 ) for s1 , s2 in zip ( true_samples , npe_samples )] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Indeed, with increasing number of trials the posterior density concentrates around the true underlying parameter.","title":"The analytical posterior concentrates around true parameters with increasing number of IID trials"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#iid-inference-with-nle","text":"(S)NLE can easily perform inference given multiple IID x because it is based on learning the likelihood. Once the likelihood is learned on single trials, i.e., a neural network that given a single observation and a parameter predicts the likelihood of that observation given the parameter, one can perform MCMC to obtain posterior samples. MCMC relies on evaluating ratios of likelihoods of candidate parameters to either accept or reject them to be posterior samples. When inferring the posterior given multiple IID observation, these likelihoods are just the joint likelihoods of each IID observation given the current parameter candidate. Thus, given a neural likelihood from SNLE, we can calculate these joint likelihoods and perform MCMC given IID data, we just have to multiply together (or add in log-space) the individual trial-likelihoods ( sbi takes care of that). # Train SNLE. inferer = SNLE ( prior , show_progress_bars = True , density_estimator = \"mdn\" ) theta , x = simulate_for_sbi ( simulator , prior , 10000 , simulation_batch_size = 1000 ) inferer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ); Running 10000 simulations.: 0%| | 0/10000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); The pairplot above already indicates that (S)NLE is well able to obtain accurate posterior samples also for increasing number of trials (note that we trained the single-round version of SNLE so that we did not have to re-train it for new \\(x_o\\) ). Quantitatively we can measure the accuracy of SNLE by calculating the c2st score between SNLE and the true posterior samples, where the best accuracy is perfect for 0.5 : cs = [ c2st ( torch . from_numpy ( s1 ), torch . from_numpy ( s2 )) for s1 , s2 in zip ( true_samples , nle_samples ) ] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.51 c2st score for num_trials=15: 0.51 c2st score for num_trials=20: 0.51","title":"IID inference with NLE"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#iid-inference-with-npe-using-permutation-invariant-embedding-nets","text":"For NPE we need to define an embedding net that handles the set-like structure of iid-data, i.e., that it permutation invariant and can handle different number of trials. We implemented several embedding net classes that allow to construct such a permutation- and number-of-trials invariant embedding net. To become permutation invariant, the neural net first learns embeddings for single trials and then performs a permutation invariant operation on those embeddings, e.g., by taking the sum or the mean (Chen et al. 2018, Radev et al. 2021). To become invariant w.r.t. the number-of-trials, we train the net with varying number of trials for each parameter setting. As it is difficult to handle tensors of varying lengths in the SBI training loop, we construct a training data set in which \u201cunobserved\u201d trials are mask by NaNs (and ignore the resulting SBI warning about NaNs in the training data).","title":"IID inference with NPE using permutation-invariant embedding nets"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#construct-training-data-set","text":"# we need to fix the maximum number of trials. max_num_trials = 20 # construct training data set: we want to cover the full range of possible number of # trials num_training_samples = 5000 theta = prior . sample (( num_training_samples ,)) # there are certainly smarter ways to construct the training data set, but we go with a # for loop here for illustration purposes. x = torch . ones ( num_training_samples * max_num_trials , max_num_trials , x_dim ) * float ( \"nan\" ) for i in range ( num_training_samples ): xi = simulator ( theta [ i ] . repeat ( max_num_trials , 1 )) for j in range ( max_num_trials ): x [ i * max_num_trials + j , : j + 1 , :] = xi [: j + 1 , :] theta = theta . repeat_interleave ( max_num_trials , dim = 0 )","title":"Construct training data set."},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#build-embedding-net","text":"from sbi.neural_nets.embedding_nets import ( FCEmbedding , PermutationInvariantEmbedding , ) from sbi.utils import posterior_nn # embedding latent_dim = 10 single_trial_net = FCEmbedding ( input_dim = theta_dim , num_hiddens = 40 , num_layers = 2 , output_dim = latent_dim , ) embedding_net = PermutationInvariantEmbedding ( single_trial_net , trial_net_output_dim = latent_dim , # NOTE: post-embedding is not needed really. num_layers = 1 , num_hiddens = 10 , output_dim = 10 , ) # we choose a simple MDN as the density estimator. # NOTE: we turn off z-scoring of the data, as we used NaNs for the missing trials. density_estimator = posterior_nn ( \"mdn\" , embedding_net = embedding_net , z_score_x = \"none\" )","title":"Build embedding net"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#run-training","text":"inference = SNPE ( prior , density_estimator = density_estimator ) # NOTE: we don't exclude invalid x because we used NaNs for the missing trials. inference . append_simulations ( theta , x , exclude_invalid_x = False , ) . train ( training_batch_size = 1000 ) posterior = inference . build_posterior () WARNING:root:Found 95000 NaN simulations and 0 Inf simulations. They are not excluded from training due to `exclude_invalid_x=False`.Training will likely fail, we strongly recommend `exclude_invalid_x=True` for Single-round NPE. Neural network successfully converged after 168 epochs.","title":"Run training"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#amortized-inference","text":"Comparing runtimes, we see that the NPE training takes a bit longer than the training on single trials for NLE above. However, we trained the density estimator such that it can handle multiple and changing number of iid trials (up to 20). Thus, we can obtain posterior samples for different x_o with just a single forward pass instead of having to run MCMC for each new observation. As you can see below, the c2st score for increasing number of observed trials remains close to the ideal 0.5 . npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) cs = [ c2st ( torch . from_numpy ( s1 ), s2 ) for s1 , s2 in zip ( true_samples , npe_samples )] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00 Dimensions: (chain: 4, draw: 1254, theta_dim_0: 2) Coordinates: * chain (chain) int64 0 1 2 3 * draw (draw) int64 0 1 2 3 4 5 6 ... 1248 1249 1250 1251 1252 1253 * theta_dim_0 (theta_dim_0) int64 0 1 Data variables: theta (chain, draw, theta_dim_0) float32 2.125 0.8092 ... 0.8088 Attributes: created_at: 2022-08-10T14:02:41.300799 arviz_version: 0.11.2 Diagnostic plots \u00b6 az . style . use ( \"arviz-darkgrid\" ) az . plot_rank ( inference_data ) array([, ], dtype=object) az . plot_autocorr ( inference_data ); az . plot_trace ( inference_data , compact = False ); az . plot_ess ( inference_data , kind = \"evolution\" ); Posterior density plots \u00b6 az . plot_posterior ( inference_data ) array([, ], dtype=object) print ( f \"Given the { num_trials } we observed, the posterior is centered around true underlying parameters theta_o: { theta_o } \" ) Given the 100 we observed, the posterior is centered around true underlying parameters theta_o: tensor([[1.9622, 0.7550]]) az . plot_pair ( inference_data ) az . plot_pair ( inference_data , var_names = [ \"theta\" ], kind = \"hexbin\" , marginals = True , figsize = ( 10 , 10 ), ) array([[, None], [, ]], dtype=object)","title":"Density plots and MCMC diagnostics with ArviZ"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#mcmc-diagnostics-with-arviz","text":"This tutorial shows how to evaluate the quality of MCMC samples generated via sbi using the arviz package. Outline: 1) Train MNLE to approximate the likelihood underlying the simulator 2) Run MCMC using pyro MCMC samplers via sbi interface 3) Use arviz to visualize the posterior, predictive distributions and MCMC diagnostics. import arviz as az import torch from sbi.inference import MNLE , likelihood_estimator_based_potential from pyro.distributions import InverseGamma from torch.distributions import Beta , Binomial , Gamma from sbi.utils import MultipleIndependent from sbi.inference import MCMCPosterior # Seeding torch . manual_seed ( 1 ); # Toy simulator for mixed data def mixed_simulator ( theta ): beta , ps = theta [:, : 1 ], theta [:, 1 :] choices = Binomial ( probs = ps ) . sample () rts = InverseGamma ( concentration = 2 * torch . ones_like ( beta ), rate = beta ) . sample () return torch . cat (( rts , choices ), dim = 1 ) # Define independent priors for each dimension. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , )","title":"MCMC diagnostics with Arviz"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#train-mnle-to-approximate-the-likelihood","text":"For this tutorial, we will use a simple simulator with two parameters. For details see the example on the decision making model . Here, we pass mcmc_method=\"nuts\" in order to use the underlying pyro No-U-turn sampler , but it would work as well with other samplers (e.g. \u201cslice_np_vectorized\u201d, \u201chmc\u201d). Additionally, when calling posterior.sample(...) we pass return_arviz=True so that the Arviz InferenceData object is returned. This object gives us access to the wealth of MCMC diagnostics tool provided by arviz . # Generate training data and train MNLE. num_simulations = 10000 theta = prior . sample (( num_simulations ,)) x = mixed_simulator ( theta ) trainer = MNLE ( prior ) likelihood_estimator = trainer . append_simulations ( theta , x ) . train () /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 65 epochs.","title":"Train MNLE to approximate the likelihood"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#run-pyro-nuts-mcmc-and-obtain-arviz-inferencedata-object","text":"# Simulate \"observed\" data x_o torch . manual_seed ( 42 ) num_trials = 100 theta_o = prior . sample (( 1 ,)) x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) # Set MCMC parameters and run Pyro NUTS. mcmc_parameters = dict ( num_chains = 4 , thin = 5 , warmup_steps = 50 , init_strategy = \"proposal\" , method = \"nuts\" , ) num_samples = 1000 # get the potential function and parameter transform for constructing the posterior potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) mnle_posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform , ** mcmc_parameters ) mnle_samples = mnle_posterior . sample ( ( num_samples ,), x = x_o , show_progress_bars = False ) # get arviz InferenceData object from posterior inference_data = mnle_posterior . get_arviz_inference_data () /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:280: UserWarning: An x with a batch size of 100 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn(","title":"Run Pyro NUTS MCMC and obtain arviz InferenceData object"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#generate-arviz-plots","text":"The resulting InferenceData object can be passed to most arviz plotting functions, and there are plenty see here for an overview. To get a better understanding of the InferenceData object see here . Below and overview of common MCMC diagnostics plot, see the corresponding arviz documentation for interpretation of the plots. We will a full use-case using the SBI-MCMC-arviz workflow soon. print ( inference_data . posterior ) Dimensions: (chain: 4, draw: 1254, theta_dim_0: 2) Coordinates: * chain (chain) int64 0 1 2 3 * draw (draw) int64 0 1 2 3 4 5 6 ... 1248 1249 1250 1251 1252 1253 * theta_dim_0 (theta_dim_0) int64 0 1 Data variables: theta (chain, draw, theta_dim_0) float32 2.125 0.8092 ... 0.8088 Attributes: created_at: 2022-08-10T14:02:41.300799 arviz_version: 0.11.2","title":"Generate arviz plots"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#diagnostic-plots","text":"az . style . use ( \"arviz-darkgrid\" ) az . plot_rank ( inference_data ) array([, ], dtype=object) az . plot_autocorr ( inference_data ); az . plot_trace ( inference_data , compact = False ); az . plot_ess ( inference_data , kind = \"evolution\" );","title":"Diagnostic plots"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#posterior-density-plots","text":"az . plot_posterior ( inference_data ) array([, ], dtype=object) print ( f \"Given the { num_trials } we observed, the posterior is centered around true underlying parameters theta_o: { theta_o } \" ) Given the 100 we observed, the posterior is centered around true underlying parameters theta_o: tensor([[1.9622, 0.7550]]) az . plot_pair ( inference_data ) az . plot_pair ( inference_data , var_names = [ \"theta\" ], kind = \"hexbin\" , marginals = True , figsize = ( 10 , 10 ), ) array([[, None], [, ]], dtype=object)","title":"Posterior density plots"},{"location":"tutorial/16_implemented_methods/","text":"API of implemented methods \u00b6 This notebook spells out the API for all algorithms implemented in the sbi toolbox: Posterior estimation (SNPE) Likelihood estimation (SNLE) Likelihood-ratio estimation (SNRE) Utilities Posterior estimation (SNPE) \u00b6 Fast \u03b5-free Inference of Simulation Models with Bayesian Conditional Density Estimation by Papamakarios & Murray (NeurIPS 2016) [PDF] [BibTeX] from sbi.inference import SNPE_A inference = SNPE_A ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Automatic posterior transformation for likelihood-free inference by Greenberg, Nonnenmacher & Macke (ICML 2019) [PDF] from sbi.inference import SNPE inference = SNPE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Truncated proposals for scalable and hassle-free simulation-based inference by Deistler, Goncalves & Macke (NeurIPS 2022) [Paper] from sbi.inference import SNPE from sbi.utils import get_density_thresholder , RestrictedPrior inference = SNPE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( force_first_round_loss = True ) posterior = inference . build_posterior () . set_default_x ( x_o ) accept_reject_fn = get_density_thresholder ( posterior , quantile = 1e-4 ) proposal = RestrictedPrior ( prior , accept_reject_fn , sample_with = \"rejection\" ) Likelihood estimation (SNLE) \u00b6 Sequential neural likelihood: Fast likelihood-free inference with autoregressive flows by Papamakarios, Sterratt & Murray (AISTATS 2019) [PDF] [BibTeX] from sbi.inference import SNLE inference = SNLE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Variational methods for simulation-based inference by Gl\u00f6ckler, Deistler, Macke (ICLR 2022) [Paper] from sbi.inference import SNLE inference = SNLE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior ( sample_with = \"vi\" , vi_method = \"fKL\" ) . set_default_x ( x_o ) proposal = posterior Flexible and efficient simulation-based inference for models of decision-making by Boelts, Lueckmann, Gao, Macke (Elife 2022) [Paper] from sbi.inference import MNLE inference = MNLE ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) Likelihood-ratio estimation (SNRE) \u00b6 Likelihood-free MCMC with Amortized Approximate Likelihood Ratios by Hermans, Begy & Louppe (ICML 2020) [PDF] from sbi.inference import SNRE_A inference = SNRE_A ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) On Contrastive Learning for Likelihood-free Inference Durkan, Murray & Papamakarios (ICML 2020) [PDF] . from sbi.inference import SNRE inference = SNRE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation by Delaunoy, Hermans, Rozet, Wehenkel & Louppe (NeurIPS 2022) [PDF] from sbi.inference import BNRE inference = BNRE ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( regularization_strength = 100. ) posterior = inference . build_posterior () . set_default_x ( x_o ) Contrastive Neural Ratio Estimation Benjamin Kurt Miller, Christoph Weniger, Patrick Forr\u00e9 (NeurIPS 2022) [PDF] # The main feature of NRE-C is producing an exact ratio of densities at optimum, even when using multiple contrastive pairs (classes). from sbi.inference import SNRE_C # Amortized inference inference = SNRE_C ( prior ) proposal = prior theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( num_classes = 5 , # SNRE_C sees `2 * num_classes - 1` marginally drawn contrastive pairs. gamma = 1.0 , # SNRE_C can control the weight between terms in its loss function. ) posterior = inference . build_posterior () . set_default_x ( x_o ) Utilities \u00b6 Simulation-based calibration by Talts, Betancourt, Simpson, Vehtari, Gelman (arxiv 2018) [Paper] ) from sbi.analysis import run_sbc , sbc_rank_plot thetas = prior . sample (( 1_000 ,)) xs = simulator ( thetas ) ranks , dap_samples = run_sbc ( thetas , xs , posterior , num_posterior_samples = 1_000 ) _ = sbc_rank_plot ( ranks = ranks , num_posterior_samples = num_posterior_samples , plot_type = \"hist\" , num_bins = None , ) Restriction estimator by Deistler, Macke & Goncalves (PNAS 2022) [Paper] from sbi.inference import SNPE from sbi.utils import RestrictionEstimator restriction_estimator = RestrictionEstimator ( prior = prior ) proposal = prior for _ in range ( num_rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) restriction_estimator . append_simulations ( theta , x ) classifier = restriction_estimator . train () proposal = restriction_estimator . restrict_prior () all_theta , all_x , _ = restriction_estimator . get_simulations () inference = SNPE ( prior ) density_estimator = inference . append_simulations ( all_theta , all_x ) . train () posterior = inference . build_posterior () Expected coverage (sample-based) as computed in Deistler, Goncalves, Macke (Neurips 2022) [Paper] and in Rozet, Louppe (2021) [Paper] from sbi.analysis import run_sbc , sbc_rank_plot thetas = prior . sample (( 1_000 ,)) xs = simulator ( thetas ) ranks , dap_samples = run_sbc ( thetas , xs , posterior , num_posterior_samples = 1_000 , reduce_fns = posterior . log_prob ) _ = sbc_rank_plot ( ranks = ranks , num_posterior_samples = num_posterior_samples , plot_type = \"hist\" , num_bins = None , )","title":"Implemented algorithms"},{"location":"tutorial/16_implemented_methods/#api-of-implemented-methods","text":"This notebook spells out the API for all algorithms implemented in the sbi toolbox: Posterior estimation (SNPE) Likelihood estimation (SNLE) Likelihood-ratio estimation (SNRE) Utilities","title":"API of implemented methods"},{"location":"tutorial/16_implemented_methods/#posterior-estimation-snpe","text":"Fast \u03b5-free Inference of Simulation Models with Bayesian Conditional Density Estimation by Papamakarios & Murray (NeurIPS 2016) [PDF] [BibTeX] from sbi.inference import SNPE_A inference = SNPE_A ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Automatic posterior transformation for likelihood-free inference by Greenberg, Nonnenmacher & Macke (ICML 2019) [PDF] from sbi.inference import SNPE inference = SNPE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Truncated proposals for scalable and hassle-free simulation-based inference by Deistler, Goncalves & Macke (NeurIPS 2022) [Paper] from sbi.inference import SNPE from sbi.utils import get_density_thresholder , RestrictedPrior inference = SNPE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( force_first_round_loss = True ) posterior = inference . build_posterior () . set_default_x ( x_o ) accept_reject_fn = get_density_thresholder ( posterior , quantile = 1e-4 ) proposal = RestrictedPrior ( prior , accept_reject_fn , sample_with = \"rejection\" )","title":"Posterior estimation (SNPE)"},{"location":"tutorial/16_implemented_methods/#likelihood-estimation-snle","text":"Sequential neural likelihood: Fast likelihood-free inference with autoregressive flows by Papamakarios, Sterratt & Murray (AISTATS 2019) [PDF] [BibTeX] from sbi.inference import SNLE inference = SNLE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Variational methods for simulation-based inference by Gl\u00f6ckler, Deistler, Macke (ICLR 2022) [Paper] from sbi.inference import SNLE inference = SNLE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior ( sample_with = \"vi\" , vi_method = \"fKL\" ) . set_default_x ( x_o ) proposal = posterior Flexible and efficient simulation-based inference for models of decision-making by Boelts, Lueckmann, Gao, Macke (Elife 2022) [Paper] from sbi.inference import MNLE inference = MNLE ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o )","title":"Likelihood estimation (SNLE)"},{"location":"tutorial/16_implemented_methods/#likelihood-ratio-estimation-snre","text":"Likelihood-free MCMC with Amortized Approximate Likelihood Ratios by Hermans, Begy & Louppe (ICML 2020) [PDF] from sbi.inference import SNRE_A inference = SNRE_A ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) On Contrastive Learning for Likelihood-free Inference Durkan, Murray & Papamakarios (ICML 2020) [PDF] . from sbi.inference import SNRE inference = SNRE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation by Delaunoy, Hermans, Rozet, Wehenkel & Louppe (NeurIPS 2022) [PDF] from sbi.inference import BNRE inference = BNRE ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( regularization_strength = 100. ) posterior = inference . build_posterior () . set_default_x ( x_o ) Contrastive Neural Ratio Estimation Benjamin Kurt Miller, Christoph Weniger, Patrick Forr\u00e9 (NeurIPS 2022) [PDF] # The main feature of NRE-C is producing an exact ratio of densities at optimum, even when using multiple contrastive pairs (classes). from sbi.inference import SNRE_C # Amortized inference inference = SNRE_C ( prior ) proposal = prior theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( num_classes = 5 , # SNRE_C sees `2 * num_classes - 1` marginally drawn contrastive pairs. gamma = 1.0 , # SNRE_C can control the weight between terms in its loss function. ) posterior = inference . build_posterior () . set_default_x ( x_o )","title":"Likelihood-ratio estimation (SNRE)"},{"location":"tutorial/16_implemented_methods/#utilities","text":"Simulation-based calibration by Talts, Betancourt, Simpson, Vehtari, Gelman (arxiv 2018) [Paper] ) from sbi.analysis import run_sbc , sbc_rank_plot thetas = prior . sample (( 1_000 ,)) xs = simulator ( thetas ) ranks , dap_samples = run_sbc ( thetas , xs , posterior , num_posterior_samples = 1_000 ) _ = sbc_rank_plot ( ranks = ranks , num_posterior_samples = num_posterior_samples , plot_type = \"hist\" , num_bins = None , ) Restriction estimator by Deistler, Macke & Goncalves (PNAS 2022) [Paper] from sbi.inference import SNPE from sbi.utils import RestrictionEstimator restriction_estimator = RestrictionEstimator ( prior = prior ) proposal = prior for _ in range ( num_rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) restriction_estimator . append_simulations ( theta , x ) classifier = restriction_estimator . train () proposal = restriction_estimator . restrict_prior () all_theta , all_x , _ = restriction_estimator . get_simulations () inference = SNPE ( prior ) density_estimator = inference . append_simulations ( all_theta , all_x ) . train () posterior = inference . build_posterior () Expected coverage (sample-based) as computed in Deistler, Goncalves, Macke (Neurips 2022) [Paper] and in Rozet, Louppe (2021) [Paper] from sbi.analysis import run_sbc , sbc_rank_plot thetas = prior . sample (( 1_000 ,)) xs = simulator ( thetas ) ranks , dap_samples = run_sbc ( thetas , xs , posterior , num_posterior_samples = 1_000 , reduce_fns = posterior . log_prob ) _ = sbc_rank_plot ( ranks = ranks , num_posterior_samples = num_posterior_samples , plot_type = \"hist\" , num_bins = None , )","title":"Utilities"},{"location":"tutorial/17_SBI_for_models_of_decision_making/","text":"SBI with mixed data, iid data, and experimental conditions \u00b6 For a general tutorial on using SBI with trial-based iid data, see tutorial 14 . Here, we cover the use-case often occurring in models of decision-making: trial-based data with mixed data types and varying experimental conditions. Trial-based SBI with mixed data types \u00b6 In some cases, models with trial-based data additionally return data with mixed data types, e.g., continous and discrete data. For example, most computational models of decision-making have continuous reaction times and discrete choices as output. This can induce a problem when performing trial-based SBI that relies on learning a neural likelihood: It is challenging for most density estimators to handle both, continuous and discrete data at the same time. However, there is a recent SBI method for solving this problem, it\u2019s called Mixed Neural Likelihood Estimation (MNLE). It works just like NLE, but with mixed data types. The trick is that it learns two separate density estimators, one for the discrete part of the data, and one for the continuous part, and combines the two to obtain the final neural likelihood. Crucially, the continuous density estimator is trained conditioned on the output of the discrete one, such that statistical dependencies between the discrete and continuous data (e.g., between choices and reaction times) are modeled as well. The interested reader is referred to the original paper available here . MNLE was recently added to sbi (see this PR and also issue ) and follow the same API as SNLE . In this tutorial we will show how to apply MNLE to mixed data, and how to deal with varying experimental conditions. Toy problem for MNLE \u00b6 To illustrate MNLE we set up a toy simulator that outputs mixed data and for which we know the likelihood such we can obtain reference posterior samples via MCMC. Simulator : To simulate mixed data we do the following Sample reaction time from inverse Gamma Sample choices from Binomial Return reaction time \\(rt \\in (0, \\infty)\\) and choice index \\(c \\in \\{0, 1\\}\\) \\[ c \\sim \\text{Binomial}(\\rho) \\\\ rt \\sim \\text{InverseGamma}(\\alpha=2, \\beta) \\\\ \\] Prior : The priors of the two parameters \\(\\rho\\) and \\(\\beta\\) are independent. We define a Beta prior over the probabilty parameter of the Binomial used in the simulator and a Gamma prior over the shape-parameter of the inverse Gamma used in the simulator: \\[ p(\\beta, \\rho) = p(\\beta) \\; p(\\rho) ; \\\\ p(\\beta) = \\text{Gamma}(1, 0.5) \\\\ p(\\text{probs}) = \\text{Beta}(2, 2) \\] Because the InverseGamma and the Binomial likelihoods are well-defined we can perform MCMC on this problem and obtain reference-posterior samples. import matplotlib.pyplot as plt import torch from torch import Tensor from sbi.inference import MNLE from pyro.distributions import InverseGamma from torch.distributions import Beta , Binomial , Categorical , Gamma from sbi.utils import MultipleIndependent from sbi.utils.metrics import c2st from sbi.analysis import pairplot from sbi.inference import MCMCPosterior from sbi.utils.torchutils import atleast_2d from sbi.inference.potentials.likelihood_based_potential import ( MixedLikelihoodBasedPotential , ) from sbi.utils.conditional_density_utils import ConditionedPotential from sbi.utils import mcmc_transform from sbi.inference.potentials.base_potential import BasePotential # Toy simulator for mixed data def mixed_simulator ( theta : Tensor , concentration_scaling : float = 1.0 ): \"\"\"Returns a sample from a mixed distribution given parameters theta. Args: theta: batch of parameters, shape (batch_size, 2) concentration_scaling: scaling factor for the concentration parameter of the InverseGamma distribution, mimics an experimental condition. \"\"\" beta , ps = theta [:, : 1 ], theta [:, 1 :] choices = Binomial ( probs = ps ) . sample () rts = InverseGamma ( concentration = concentration_scaling * torch . ones_like ( beta ), rate = beta ) . sample () return torch . cat (( rts , choices ), dim = 1 ) # The potential function defines the ground truth likelihood and allows us to obtain reference posterior samples via MCMC. class PotentialFunctionProvider ( BasePotential ): allow_iid_x = True # type: ignore def __init__ ( self , prior , x_o , concentration_scaling = 1.0 , device = \"cpu\" ): super () . __init__ ( prior , x_o , device ) self . concentration_scaling = concentration_scaling def __call__ ( self , theta , track_gradients : bool = True ): theta = atleast_2d ( theta ) with torch . set_grad_enabled ( track_gradients ): iid_ll = self . iid_likelihood ( theta ) return iid_ll + self . prior . log_prob ( theta ) def iid_likelihood ( self , theta ): lp_choices = torch . stack ( [ Binomial ( probs = th . reshape ( 1 , - 1 )) . log_prob ( self . x_o [:, 1 :]) for th in theta [:, 1 :] ], dim = 1 , ) lp_rts = torch . stack ( [ InverseGamma ( concentration = self . concentration_scaling * torch . ones_like ( beta_i ), rate = beta_i , ) . log_prob ( self . x_o [:, : 1 ]) for beta_i in theta [:, : 1 ] ], dim = 1 , ) joint_likelihood = ( lp_choices + lp_rts ) . squeeze () assert joint_likelihood . shape == torch . Size ([ self . x_o . shape [ 0 ], theta . shape [ 0 ]]) return joint_likelihood . sum ( 0 ) # Define independent prior. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , ) Obtain reference-posterior samples via analytical likelihood and MCMC \u00b6 torch . manual_seed ( 42 ) num_trials = 10 num_samples = 1000 theta_o = prior . sample (( 1 ,)) x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) mcmc_kwargs = dict ( num_chains = 20 , warmup_steps = 50 , method = \"slice_np_vectorized\" , init_strategy = \"proposal\" , ) true_posterior = MCMCPosterior ( potential_fn = PotentialFunctionProvider ( prior , x_o ), proposal = prior , theta_transform = mcmc_transform ( prior , enable_transform = True ), ** mcmc_kwargs , ) true_samples = true_posterior . sample (( num_samples ,)) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 10 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00 https://sbi-dev.github.io/sbi/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/citation/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/code_of_conduct/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/contribute/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/credits/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/faq/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/install/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/reference/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/examples/00_HH_simulator/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/examples/01_decision_making_model/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/faq/question_01/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/faq/question_02/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/faq/question_03/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/faq/question_04/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/faq/question_05/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/faq/question_06/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/faq/question_07/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/tutorial/00_getting_started/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/tutorial/01_gaussian_amortized/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/tutorial/02_flexible_interface/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/tutorial/03_multiround_inference/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/tutorial/04_density_estimators/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/tutorial/05_embedding_net/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/tutorial/07_conditional_distributions/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/tutorial/08_restriction_estimator/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/tutorial/09_sensitivity_analysis/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/tutorial/10_crafting_summary_statistics/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/tutorial/11_sampler_interface/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/tutorial/12_diagnostics_posterior_predictive_check/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/tutorial/13_diagnostics_simulation_based_calibration/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/tutorial/14_iid_data_and_permutation_invariant_embeddings/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/tutorial/14_iid_data_and_permutation_invarient_embeddings/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/tutorial/15_mcmc_diagnostics_with_arviz/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/tutorial/16_implemented_methods/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/tutorial/17_SBI_for_models_of_decision_making/ - 2023-11-02 + 2023-11-03 daily https://sbi-dev.github.io/sbi/tutorial/17_vi_posteriors/ - 2023-11-02 + 2023-11-03 daily \ No newline at end of file diff --git a/sitemap.xml.gz b/sitemap.xml.gz index 67e4defa53a92e3131362ec4958e9b01a42d18cc..1659e99043d4037b84ce97d55d2e0f22fbbdb084 100644 GIT binary patch delta 30 mcmeBR?O@8;l$*yb{keGjAQ#v^+eIiBRrev>N6zyJW0-wH_p delta 30 mcmeBR?O@8;l`e#&_w`yNKojYsw{a-?PiM5jtJFaQ9N4hb3n diff --git a/tutorial/00_getting_started/index.html b/tutorial/00_getting_started/index.html index ab6c7dd1c..55f05d55a 100644 --- a/tutorial/00_getting_started/index.html +++ b/tutorial/00_getting_started/index.html @@ -346,47 +346,6 @@ Next steps - - -
  • - - Requirements for the simulator, prior, and observation - - - - -
  • - -
  • - - Running different algorithms - -
  • @@ -403,20 +362,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -432,8 +377,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -507,8 +452,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -549,8 +494,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -563,8 +508,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -577,8 +522,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • @@ -941,47 +886,6 @@ Next steps - - -
  • - - Requirements for the simulator, prior, and observation - - - - -
  • - -
  • - - Running different algorithms - -
  • @@ -1017,12 +921,12 @@

    Running the inference procedure
    num_dim = 3
     prior = utils.BoxUniform(low=-2 * torch.ones(num_dim), high=2 * torch.ones(num_dim))
     
    -
     def simulator(parameter_set):
         return 1.0 + parameter_set + torch.randn(parameter_set.shape) * 0.1
     

    sbi can then run inference:

    -
    posterior = infer(simulator, prior, method="SNPE", num_simulations=1000)
    +
    # Other methods are "SNLE" or "SNRE".
    +posterior = infer(simulator, prior, method="SNPE", num_simulations=1000)
     
    Running 1000 simulations.:   0%|          | 0/1000 [00:00<?, ?it/s]
     
    @@ -1043,32 +947,7 @@ 

    Running the inference procedurepng

    Next steps

    -

    The single-line interface described above provides an easy entry for using sbi. However, if you are working on a larger project or need additional features, we strongly recommend using the flexible interface.

    -

    Requirements for the simulator, prior, and observation

    -

    In the interface described above, you need to provide a prior and a simulator for training. Let’s talk about what requirements they need to satisfy.

    -

    Prior

    -

    A prior is a distribution object that allows to sample parameter sets. Any class for the prior is allowed as long as it allows to call prior.sample() and prior.log_prob().

    -

    Simulator

    -

    The simulator is a Python callable that takes in a parameter set and outputs data with some (even if very small) stochasticity.

    -

    Allowed data types and shapes for input and output:

    -
      -
    • the input parameter set and the output have to be either a np.ndarray or a torch.Tensor.
    • -
    • the input parameter set should have either shape (1,N) or (N), and the output must have shape (1,M) or (M).
    • -
    -

    You can call simulators not written in Python as long as you wrap them in a Python function.

    -

    Observation

    -

    Once you have a trained posterior, you will want to evaluate or sample the posterior \(p(\theta|x_o)\) at certain observed values \(x_o\):

    -
      -
    • The allowable data types are either Numpy np.ndarray or a torch torch.Tensor.
    • -
    • The shape must be either (1,M) or just (M).
    • -
    -

    Running different algorithms

    -

    sbi implements three classes of algorithms that can be used to obtain the posterior distribution: SNPE, SNLE, and SNRE. You can try the different algorithms by simply swapping out the method:

    -
    posterior = infer(simulator, prior, method="SNPE", num_simulations=1000)
    -posterior = infer(simulator, prior, method="SNLE", num_simulations=1000)
    -posterior = infer(simulator, prior, method="SNRE", num_simulations=1000)
    -
    -

    You can then infer, sample, evaluate, and plot the posterior as described above.

    +

    The single-line interface described above provides an easy entry for using sbi. However, on almost any real-world problem that goes beyond a simple demonstration, we strongly recommend using the flexible interface.

    @@ -1104,13 +983,13 @@

    Running different algorithms diff --git a/tutorial/09_sensitivity_analysis/index.html b/tutorial/09_sensitivity_analysis/index.html index b0e691433..b7a80d3cf 100644 --- a/tutorial/09_sensitivity_analysis/index.html +++ b/tutorial/09_sensitivity_analysis/index.html @@ -311,20 +311,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -340,8 +326,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -415,8 +401,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -457,8 +443,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -471,8 +457,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -485,8 +471,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • diff --git a/tutorial/10_crafting_summary_statistics/index.html b/tutorial/10_crafting_summary_statistics/index.html index d842e288a..f69c3a25c 100644 --- a/tutorial/10_crafting_summary_statistics/index.html +++ b/tutorial/10_crafting_summary_statistics/index.html @@ -311,20 +311,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -340,8 +326,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -417,8 +403,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -458,6 +444,20 @@ +
  • + + SBI with trial-based data + +
  • + + + + + + + + +
  • Handling invalid simulations @@ -491,20 +491,6 @@ - - - - - -
  • - - SBI with trial-based data - -
  • - - - - @@ -1167,13 +1153,13 @@

    1.7 Explicit recommendations @@ -933,7 +844,7 @@ -

    The sampler interface

    +

    Sampling algorithms in sbi

    Note: this tutorial requires that the user is already familiar with the flexible interface.

    sbi implements three methods: SNPE, SNLE, and SNRE. When using SNPE, the trained neural network directly approximates the posterior. Thus, sampling from the posterior can be done by sampling from the trained neural network. The neural networks trained in SNLE and SNRE approximate the likelihood(-ratio). Thus, in order to draw samples from the posterior, one has to perform additional sampling steps, e.g. Markov-chain Monte-Carlo (MCMC). In sbi, the implemented samplers are:

    -
    diff --git a/tutorial/13_diagnostics_simulation_based_calibration/index.html b/tutorial/13_diagnostics_simulation_based_calibration/index.html index ade247774..8bba7e91b 100644 --- a/tutorial/13_diagnostics_simulation_based_calibration/index.html +++ b/tutorial/13_diagnostics_simulation_based_calibration/index.html @@ -311,20 +311,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -340,8 +326,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -415,8 +401,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -457,8 +443,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -471,8 +457,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -485,8 +471,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • diff --git a/tutorial/14_iid_data_and_permutation_invariant_embeddings/index.html b/tutorial/14_iid_data_and_permutation_invariant_embeddings/index.html index 538b9e998..3a319fd50 100644 --- a/tutorial/14_iid_data_and_permutation_invariant_embeddings/index.html +++ b/tutorial/14_iid_data_and_permutation_invariant_embeddings/index.html @@ -311,20 +311,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -340,8 +326,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -417,8 +403,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -457,34 +443,6 @@ - -
  • - - Handling invalid simulations - -
  • - - - - - - - - - -
  • - - Crafting summary statistics - -
  • - - - - - - - - @@ -612,6 +570,34 @@ + + + + + +
  • + + Handling invalid simulations + +
  • + + + + + + + + + +
  • + + Crafting summary statistics + +
  • + + + + @@ -1057,30 +1043,29 @@

    SBI with iid data and permutation-invariant embeddings

    There are scenarios in which we observe multiple data points per experiment and we can assume that they are independent and identically distributed (iid, i.e., they are assumed to have the same underlying model parameters). -For example, in a decision-making experiments, the experiment is often repeated in trials with the same experimental settings and conditions. The corresponding set of trials is then assumed to be “iid”. +For example, in decision-making experiments, the experiment is often repeated in trials with the same experimental settings and conditions. The corresponding set of trials is then assumed to be “iid” given a single parameter set. In such a scenario, we may want to obtain the posterior given a set of observation \(p(\theta | X=\{x_i\}_i^N)\).

    Amortization of neural network training: iid-inference with NLE / NRE

    -

    For some SBI variants the iid assumption can be exploited: when using a likelihood-based SBI method (SNLE, SNRE) one can train the density or ratio estimator on single-trial data, and then perform inference with MCMC. Crucially, because the data is iid and the estimator is trained on single-trial data, one can repeat the inference with a different x_o (a different set of trials, or different number of trials) without having to retrain the density estimator. One can interpet this as amortization of the SBI training: we can obtain a neural likelihood, or likelihood-ratio estimate for new x_os without retraining, but we still have to run MCMC or VI to do inference.

    -

    In addition, one can not only change the number of trials of a new x_o, but also the entire inference setting. -For example, one can apply hierarchical inference scenarios with changing hierarchical denpendencies between the model parameters–all without having to retrain the density estimator because that is based on estimating single-trail likelihoods.

    +

    For some SBI variants the iid assumption can be exploited: when using a likelihood-based SBI method (SNLE, SNRE) one can train the density or ratio estimator on single-trial data, and then perform inference with MCMC or variational inference (VI). Crucially, because the data is iid and the estimator is trained on single-trial data, one can repeat the inference with a different x_o (a different set of trials, or different number of trials) without having to retrain the density estimator. One can interpet this as amortization of the SBI training: we can obtain a neural likelihood, or likelihood-ratio estimate for new x_os without retraining, but we still have to run MCMC or VI to do inference.

    +

    In addition, one cannot only change the number of trials of a new x_o, but also the entire inference setting. +For example, one can apply hierarchical inference with changing hierarchical denpendencies between the model parameters–all without having to retrain the density estimator because it estimates single-trail likelihoods.

    Full amortization: iid-inference with NPE and permutation-invariant embedding nets

    -

    When performing neural posterior estimation (SNPE) we cannot exploit the iid assumption directly because we are learning a density estimator in theta. +

    When performing neural posterior estimation (SNPE) we cannot exploit the iid assumption directly. Thus, the underlying neural network takes x as input and predicts the parameters of the density estimator. -As a consequence, if x is a set of iid observations \(X=\{x_i\}_i^N\) then the neural network has to be invariant to permutations of this set, i.e., it has to be permutation invariant. -Overall, this means that we can use SNPE for inference with iid data, however, we need to provide a corresponding embedding network that handles the iid-data and is permutation invariant. -This will likely require some hyperparameter tuning and more training data for the inference to work accurately. But once we have this, the inference is fully amortized, i.e., we can get new posterior samples basically instantly without retraining and without running MCMC or VI.

    -

    Let us first have a look how trial-based inference works in SBI before we discuss models with “mixed data types”.

    +As a consequence, if x is a set of iid observations \(X=\{x_i\}_i^N\) then the neural network has to be invariant to permutations of this set, i.e., it has to be permutation invariant. In addition, the neural network has to be able to consume a varying number of iid datapoints in order to be amortized over the number of trials. +Therefore, in order to use SNPE for inference on iid data, we need to provide a corresponding embedding network that handles the iid-data. +This will likely require some hyperparameter tuning and more training data for inference to work accurately. But once we have this, inference is fully amortized, i.e., we can get new posterior samples almost instantly without retraining and without running MCMC or VI.

    SBI with trial-based data

    -

    For illustration we use a simple linear Gaussian simulator, as in previous tutorials. The simulator takes a single parameter (vector), the mean of the Gaussian, and its variance is set to one. -We define a Gaussian prior over the mean and perform inference. -The observed data is again a from a Gaussian with some fixed “ground-truth” parameter \(\theta_o\). -Crucially, the observed data x_o can consist of multiple samples given the same ground-truth parameters and these samples are then iid:

    +

    For illustration, we use a simple linear Gaussian simulator, as in previous tutorials. The simulator takes a single parameter (vector) which is the mean of a Gaussian. The simulator then adds noise with a fixed variance (set to one). +We define a Gaussian prior over the mean and perform inference.

    +

    The observed data is also sampled from a Gaussian with some fixed “ground-truth” parameter \(\theta_o\). +Crucially, the observed data x_o can consist of multiple samples given the same ground-truth parameters and these samples are iid given \(\theta\):

    \[ \theta \sim \mathcal{N}(\mu_0,\; \Sigma_0) \\ x | \theta \sim \mathcal{N}(\theta,\; \Sigma=I) \\ \mathbf{x_o} = \{x_o^i\}_{i=1}^N \sim \mathcal{N}(\theta_o,\; \Sigma=I) \]
    -

    For this toy problem the ground-truth posterior is well defined, it is again a Gaussian, centered on the mean of \(\mathbf{x_o}\) and with variance scaled by the number of trials \(N\), i.e., the more trials we observe, the more information about the underlying \(\theta_o\) we have and the more concentrated the posteriors becomes.

    +

    For this toy problem, the ground-truth posterior is well defined, it is again a Gaussian, centered on the mean of \(\mathbf{x_o}\) and with variance scaled by the number of trials \(N\), i.e., the more trials we observe, the more information about the underlying \(\theta_o\) we have and the more concentrated the posteriors becomes.

    We will illustrate this below:

    import torch
     import matplotlib.pyplot as plt
    @@ -1156,8 +1141,7 @@ 

    Indeed, with increasing number of trials the posterior density concentrates around the true underlying parameter.

    IID inference with NLE

    -

    (S)NLE can easily perform inference given multiple IID x because it is based on learning the likelihood. Once the likelihood is learned on single trials, i.e., a neural network that given a single observation and a parameter predicts the likelihood of that observation given the parameter, one can perform MCMC to obtain posterior samples.

    -

    MCMC relies on evaluating ratios of likelihoods of candidate parameters to either accept or reject them to be posterior samples. When inferring the posterior given multiple IID observation, these likelihoods are just the joint likelihoods of each IID observation given the current parameter candidate. Thus, given a neural likelihood from SNLE, we can calculate these joint likelihoods and perform MCMC given IID data, we just have to multiply together (or add in log-space) the individual trial-likelihoods (sbi takes care of that).

    +

    (S)NLE and (S)NRE can perform inference given multiple IID obserations by using only single-trial training data (i.e., for training, we run the simulator only once per parameter set). Once the likelihood is learned on single trials (i.e., a neural network that predicts the likelihood of a single observation given a parameter set), one can sample the posterior for any number of trials. This works because, given a single-trial neural likelihood from (S)NLE or (S)NRE, we can calculate the joint likelihoods of all trials by multiplying them together (or adding them in log-space). The joint likelihood can then be plugged into MCMC or VI. sbi takes care of all of these steps, so you do not have to implement anything yourself:

    # Train SNLE.
     inferer = SNLE(prior, show_progress_bars=True, density_estimator="mdn")
     theta, x = simulate_for_sbi(simulator, prior, 10000, simulation_batch_size=1000)
    @@ -1265,10 +1249,11 @@ 

    IID inference with NLE

    IID inference with NPE using permutation-invariant embedding nets

    -

    For NPE we need to define an embedding net that handles the set-like structure of iid-data, i.e., that it permutation invariant and can handle different number of trials.

    +

    For NPE we need to define an embedding net that handles the set-like structure of iid-data, i.e., that it permutation invariant and can handle different number of trials.

    We implemented several embedding net classes that allow to construct such a permutation- and number-of-trials invariant embedding net.

    To become permutation invariant, the neural net first learns embeddings for single trials and then performs a permutation invariant operation on those embeddings, e.g., by taking the sum or the mean (Chen et al. 2018, Radev et al. 2021).

    -

    To become invariant w.r.t. the number-of-trials, we train the net with varying number of trials for each parameter setting. As it is difficult to handle tensors of varying lengths in the SBI training loop, we construct a training data set in which “unobserved” trials are mask by NaNs (and ignore the resulting SBI warning about NaNs in the training data).

    +

    To become invariant w.r.t. the number-of-trials, we train the net with varying number of trials for each parameter setting. This means that, unlike for (S)NLE and (S)NRE, (S)NPE requires to run the simulator multiple times for individual parameter sets to generate the training data.

    +

    In order to implement this in sbi, “unobserved” trials in the training dataset have to be masked by NaNs (and ignore the resulting SBI warning about NaNs in the training data).

    Construct training data set.

    # we need to fix the maximum number of trials.
     max_num_trials = 20
    @@ -1486,7 +1471,7 @@ 

    Amortized inference -

    Previous - Crafting summary statistics + Learning summary statistics

    - diff --git a/tutorial/17_SBI_for_models_of_decision_making/index.html b/tutorial/17_SBI_for_models_of_decision_making/index.html index 19b0125ed..fafdb05ac 100644 --- a/tutorial/17_SBI_for_models_of_decision_making/index.html +++ b/tutorial/17_SBI_for_models_of_decision_making/index.html @@ -309,20 +309,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -338,8 +324,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -413,8 +399,8 @@
  • - - Using Variational Inference for Building Posteriors + + Sampling algorithms in sbi
  • @@ -455,8 +441,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -469,8 +455,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -483,8 +469,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • diff --git a/tutorial/17_vi_posteriors/index.html b/tutorial/17_vi_posteriors/index.html index 640b5ddfd..525bd9084 100644 --- a/tutorial/17_vi_posteriors/index.html +++ b/tutorial/17_vi_posteriors/index.html @@ -242,14 +242,12 @@ - - -
  • +
  • - + @@ -311,20 +309,6 @@ -
  • - - Amortized inference - -
  • - - - - - - - - -
  • Flexible interface @@ -340,8 +324,8 @@
  • - - Sampler interface + + Amortized inference
  • @@ -373,14 +357,12 @@ - - -
  • +
  • - + @@ -415,60 +397,11 @@ - - -
  • - - - - - - - - - - - Using Variational Inference for Building Posteriors +
  • + + Sampling algorithms in sbi - - - - - -
  • @@ -508,8 +441,8 @@
  • - - Handling invalid simulations + + SBI with trial-based data
  • @@ -522,8 +455,8 @@
  • - - Crafting summary statistics + + Handling invalid simulations
  • @@ -536,8 +469,8 @@
  • - - SBI with trial-based data + + Crafting summary statistics
  • @@ -920,7 +853,7 @@

    Using Variational Inference for Building Posteriors

    -

    In the previous tutorial, we saw how to build the posterior and how to specialize on one specific observation x_o. If one uses SNPE, then the posterior can be sampled from directly, yet this comes at the expense of necessary correction terms during training, since the samples are obtained from the “wrong” prior for num_rounds > 1. For SNLE or SNRE, MCMC sampling is required, which is computationally expensive. With SNVI (sequential neural variational inference), it is possible to directly sample from the posterior without any corrections during training or without expensive MCMC for sampling. This is possible by learning the posterior with variational inference techniques. For this, an additional network (one for the likelihood or likelihood-to-evidence-ratio) must be trained first.

    +

    If one uses SNPE, then the posterior can be sampled from directly (without MCMC). Contrary to that, for SNLE or SNRE, MCMC sampling is required, which is computationally expensive. With SNVI (sequential neural variational inference), it is possible to directly sample from the posterior without any corrections during training or without expensive MCMC for sampling. This is possible by learning the posterior with variational inference techniques. For this, an additional network (one for the likelihood or likelihood-to-evidence-ratio) must be trained first.

    Main syntax